1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Generic nexthop implementation |
4 | * |
5 | * Copyright (c) 2017-19 Cumulus Networks |
6 | * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> |
7 | */ |
8 | |
9 | #ifndef __LINUX_NEXTHOP_H |
10 | #define __LINUX_NEXTHOP_H |
11 | |
12 | #include <linux/netdevice.h> |
13 | #include <linux/notifier.h> |
14 | #include <linux/route.h> |
15 | #include <linux/types.h> |
16 | #include <net/ip_fib.h> |
17 | #include <net/ip6_fib.h> |
18 | #include <net/netlink.h> |
19 | |
20 | #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK |
21 | |
22 | struct nexthop; |
23 | |
24 | struct nh_config { |
25 | u32 nh_id; |
26 | |
27 | u8 nh_family; |
28 | u8 nh_protocol; |
29 | u8 nh_blackhole; |
30 | u8 nh_fdb; |
31 | u32 nh_flags; |
32 | |
33 | int nh_ifindex; |
34 | struct net_device *dev; |
35 | |
36 | union { |
37 | __be32 ipv4; |
38 | struct in6_addr ipv6; |
39 | } gw; |
40 | |
41 | struct nlattr *nh_grp; |
42 | u16 nh_grp_type; |
43 | u16 nh_grp_res_num_buckets; |
44 | unsigned long nh_grp_res_idle_timer; |
45 | unsigned long nh_grp_res_unbalanced_timer; |
46 | bool nh_grp_res_has_num_buckets; |
47 | bool nh_grp_res_has_idle_timer; |
48 | bool nh_grp_res_has_unbalanced_timer; |
49 | |
50 | struct nlattr *nh_encap; |
51 | u16 nh_encap_type; |
52 | |
53 | u32 nlflags; |
54 | struct nl_info nlinfo; |
55 | }; |
56 | |
57 | struct nh_info { |
58 | struct hlist_node dev_hash; /* entry on netns devhash */ |
59 | struct nexthop *nh_parent; |
60 | |
61 | u8 family; |
62 | bool reject_nh; |
63 | bool fdb_nh; |
64 | |
65 | union { |
66 | struct fib_nh_common fib_nhc; |
67 | struct fib_nh fib_nh; |
68 | struct fib6_nh fib6_nh; |
69 | }; |
70 | }; |
71 | |
72 | struct nh_res_bucket { |
73 | struct nh_grp_entry __rcu *nh_entry; |
74 | atomic_long_t used_time; |
75 | unsigned long migrated_time; |
76 | bool occupied; |
77 | u8 nh_flags; |
78 | }; |
79 | |
80 | struct nh_res_table { |
81 | struct net *net; |
82 | u32 nhg_id; |
83 | struct delayed_work upkeep_dw; |
84 | |
85 | /* List of NHGEs that have too few buckets ("uw" for underweight). |
86 | * Reclaimed buckets will be given to entries in this list. |
87 | */ |
88 | struct list_head uw_nh_entries; |
89 | unsigned long unbalanced_since; |
90 | |
91 | u32 idle_timer; |
92 | u32 unbalanced_timer; |
93 | |
94 | u16 num_nh_buckets; |
95 | struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); |
96 | }; |
97 | |
98 | struct nh_grp_entry { |
99 | struct nexthop *nh; |
100 | u8 weight; |
101 | |
102 | union { |
103 | struct { |
104 | atomic_t upper_bound; |
105 | } hthr; |
106 | struct { |
107 | /* Member on uw_nh_entries. */ |
108 | struct list_head uw_nh_entry; |
109 | |
110 | u16 count_buckets; |
111 | u16 wants_buckets; |
112 | } res; |
113 | }; |
114 | |
115 | struct list_head nh_list; |
116 | struct nexthop *nh_parent; /* nexthop of group with this entry */ |
117 | }; |
118 | |
119 | struct nh_group { |
120 | struct nh_group *spare; /* spare group for removals */ |
121 | u16 num_nh; |
122 | bool is_multipath; |
123 | bool hash_threshold; |
124 | bool resilient; |
125 | bool fdb_nh; |
126 | bool has_v4; |
127 | |
128 | struct nh_res_table __rcu *res_table; |
129 | struct nh_grp_entry nh_entries[] __counted_by(num_nh); |
130 | }; |
131 | |
132 | struct nexthop { |
133 | struct rb_node rb_node; /* entry on netns rbtree */ |
134 | struct list_head fi_list; /* v4 entries using nh */ |
135 | struct list_head f6i_list; /* v6 entries using nh */ |
136 | struct list_head fdb_list; /* fdb entries using this nh */ |
137 | struct list_head grp_list; /* nh group entries using this nh */ |
138 | struct net *net; |
139 | |
140 | u32 id; |
141 | |
142 | u8 protocol; /* app managing this nh */ |
143 | u8 nh_flags; |
144 | bool is_group; |
145 | |
146 | refcount_t refcnt; |
147 | struct rcu_head rcu; |
148 | |
149 | union { |
150 | struct nh_info __rcu *nh_info; |
151 | struct nh_group __rcu *nh_grp; |
152 | }; |
153 | }; |
154 | |
155 | enum nexthop_event_type { |
156 | NEXTHOP_EVENT_DEL, |
157 | NEXTHOP_EVENT_REPLACE, |
158 | NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, |
159 | NEXTHOP_EVENT_BUCKET_REPLACE, |
160 | }; |
161 | |
162 | enum nh_notifier_info_type { |
163 | NH_NOTIFIER_INFO_TYPE_SINGLE, |
164 | NH_NOTIFIER_INFO_TYPE_GRP, |
165 | NH_NOTIFIER_INFO_TYPE_RES_TABLE, |
166 | NH_NOTIFIER_INFO_TYPE_RES_BUCKET, |
167 | }; |
168 | |
169 | struct nh_notifier_single_info { |
170 | struct net_device *dev; |
171 | u8 gw_family; |
172 | union { |
173 | __be32 ipv4; |
174 | struct in6_addr ipv6; |
175 | }; |
176 | u8 is_reject:1, |
177 | is_fdb:1, |
178 | has_encap:1; |
179 | }; |
180 | |
181 | struct nh_notifier_grp_entry_info { |
182 | u8 weight; |
183 | u32 id; |
184 | struct nh_notifier_single_info nh; |
185 | }; |
186 | |
187 | struct nh_notifier_grp_info { |
188 | u16 num_nh; |
189 | bool is_fdb; |
190 | struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); |
191 | }; |
192 | |
193 | struct nh_notifier_res_bucket_info { |
194 | u16 bucket_index; |
195 | unsigned int idle_timer_ms; |
196 | bool force; |
197 | struct nh_notifier_single_info old_nh; |
198 | struct nh_notifier_single_info new_nh; |
199 | }; |
200 | |
201 | struct nh_notifier_res_table_info { |
202 | u16 num_nh_buckets; |
203 | struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); |
204 | }; |
205 | |
206 | struct nh_notifier_info { |
207 | struct net *net; |
208 | struct netlink_ext_ack *extack; |
209 | u32 id; |
210 | enum nh_notifier_info_type type; |
211 | union { |
212 | struct nh_notifier_single_info *nh; |
213 | struct nh_notifier_grp_info *nh_grp; |
214 | struct nh_notifier_res_table_info *nh_res_table; |
215 | struct nh_notifier_res_bucket_info *nh_res_bucket; |
216 | }; |
217 | }; |
218 | |
219 | int register_nexthop_notifier(struct net *net, struct notifier_block *nb, |
220 | struct netlink_ext_ack *extack); |
221 | int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); |
222 | void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); |
223 | void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, |
224 | bool offload, bool trap); |
225 | void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, |
226 | unsigned long *activity); |
227 | |
228 | /* caller is holding rcu or rtnl; no reference taken to nexthop */ |
229 | struct nexthop *nexthop_find_by_id(struct net *net, u32 id); |
230 | void nexthop_free_rcu(struct rcu_head *head); |
231 | |
232 | static inline bool nexthop_get(struct nexthop *nh) |
233 | { |
234 | return refcount_inc_not_zero(r: &nh->refcnt); |
235 | } |
236 | |
237 | static inline void nexthop_put(struct nexthop *nh) |
238 | { |
239 | if (refcount_dec_and_test(r: &nh->refcnt)) |
240 | call_rcu(head: &nh->rcu, func: nexthop_free_rcu); |
241 | } |
242 | |
243 | static inline bool nexthop_cmp(const struct nexthop *nh1, |
244 | const struct nexthop *nh2) |
245 | { |
246 | return nh1 == nh2; |
247 | } |
248 | |
249 | static inline bool nexthop_is_fdb(const struct nexthop *nh) |
250 | { |
251 | if (nh->is_group) { |
252 | const struct nh_group *nh_grp; |
253 | |
254 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
255 | return nh_grp->fdb_nh; |
256 | } else { |
257 | const struct nh_info *nhi; |
258 | |
259 | nhi = rcu_dereference_rtnl(nh->nh_info); |
260 | return nhi->fdb_nh; |
261 | } |
262 | } |
263 | |
264 | static inline bool nexthop_has_v4(const struct nexthop *nh) |
265 | { |
266 | if (nh->is_group) { |
267 | struct nh_group *nh_grp; |
268 | |
269 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
270 | return nh_grp->has_v4; |
271 | } |
272 | return false; |
273 | } |
274 | |
275 | static inline bool nexthop_is_multipath(const struct nexthop *nh) |
276 | { |
277 | if (nh->is_group) { |
278 | struct nh_group *nh_grp; |
279 | |
280 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
281 | return nh_grp->is_multipath; |
282 | } |
283 | return false; |
284 | } |
285 | |
286 | struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); |
287 | |
288 | static inline unsigned int nexthop_num_path(const struct nexthop *nh) |
289 | { |
290 | unsigned int rc = 1; |
291 | |
292 | if (nh->is_group) { |
293 | struct nh_group *nh_grp; |
294 | |
295 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
296 | if (nh_grp->is_multipath) |
297 | rc = nh_grp->num_nh; |
298 | } |
299 | |
300 | return rc; |
301 | } |
302 | |
303 | static inline |
304 | struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) |
305 | { |
306 | /* for_nexthops macros in fib_semantics.c grabs a pointer to |
307 | * the nexthop before checking nhsel |
308 | */ |
309 | if (nhsel >= nhg->num_nh) |
310 | return NULL; |
311 | |
312 | return nhg->nh_entries[nhsel].nh; |
313 | } |
314 | |
315 | static inline |
316 | int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, |
317 | u8 rt_family) |
318 | { |
319 | struct nh_group *nhg = rtnl_dereference(nh->nh_grp); |
320 | int i; |
321 | |
322 | for (i = 0; i < nhg->num_nh; i++) { |
323 | struct nexthop *nhe = nhg->nh_entries[i].nh; |
324 | struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); |
325 | struct fib_nh_common *nhc = &nhi->fib_nhc; |
326 | int weight = nhg->nh_entries[i].weight; |
327 | |
328 | if (fib_add_nexthop(skb, nh: nhc, nh_weight: weight, rt_family, nh_tclassid: 0) < 0) |
329 | return -EMSGSIZE; |
330 | } |
331 | |
332 | return 0; |
333 | } |
334 | |
335 | /* called with rcu lock */ |
336 | static inline bool nexthop_is_blackhole(const struct nexthop *nh) |
337 | { |
338 | const struct nh_info *nhi; |
339 | |
340 | if (nh->is_group) { |
341 | struct nh_group *nh_grp; |
342 | |
343 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
344 | if (nh_grp->num_nh > 1) |
345 | return false; |
346 | |
347 | nh = nh_grp->nh_entries[0].nh; |
348 | } |
349 | |
350 | nhi = rcu_dereference_rtnl(nh->nh_info); |
351 | return nhi->reject_nh; |
352 | } |
353 | |
354 | static inline void nexthop_path_fib_result(struct fib_result *res, int hash) |
355 | { |
356 | struct nh_info *nhi; |
357 | struct nexthop *nh; |
358 | |
359 | nh = nexthop_select_path(nh: res->fi->nh, hash); |
360 | nhi = rcu_dereference(nh->nh_info); |
361 | res->nhc = &nhi->fib_nhc; |
362 | } |
363 | |
364 | /* called with rcu read lock or rtnl held */ |
365 | static inline |
366 | struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) |
367 | { |
368 | struct nh_info *nhi; |
369 | |
370 | BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); |
371 | BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); |
372 | |
373 | if (nh->is_group) { |
374 | struct nh_group *nh_grp; |
375 | |
376 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
377 | if (nh_grp->is_multipath) { |
378 | nh = nexthop_mpath_select(nhg: nh_grp, nhsel); |
379 | if (!nh) |
380 | return NULL; |
381 | } |
382 | } |
383 | |
384 | nhi = rcu_dereference_rtnl(nh->nh_info); |
385 | return &nhi->fib_nhc; |
386 | } |
387 | |
388 | /* called from fib_table_lookup with rcu_lock */ |
389 | static inline |
390 | struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, |
391 | int fib_flags, |
392 | const struct flowi4 *flp, |
393 | int *nhsel) |
394 | { |
395 | struct nh_info *nhi; |
396 | |
397 | if (nh->is_group) { |
398 | struct nh_group *nhg = rcu_dereference(nh->nh_grp); |
399 | int i; |
400 | |
401 | for (i = 0; i < nhg->num_nh; i++) { |
402 | struct nexthop *nhe = nhg->nh_entries[i].nh; |
403 | |
404 | nhi = rcu_dereference(nhe->nh_info); |
405 | if (fib_lookup_good_nhc(nhc: &nhi->fib_nhc, fib_flags, flp)) { |
406 | *nhsel = i; |
407 | return &nhi->fib_nhc; |
408 | } |
409 | } |
410 | } else { |
411 | nhi = rcu_dereference(nh->nh_info); |
412 | if (fib_lookup_good_nhc(nhc: &nhi->fib_nhc, fib_flags, flp)) { |
413 | *nhsel = 0; |
414 | return &nhi->fib_nhc; |
415 | } |
416 | } |
417 | |
418 | return NULL; |
419 | } |
420 | |
421 | static inline bool nexthop_uses_dev(const struct nexthop *nh, |
422 | const struct net_device *dev) |
423 | { |
424 | struct nh_info *nhi; |
425 | |
426 | if (nh->is_group) { |
427 | struct nh_group *nhg = rcu_dereference(nh->nh_grp); |
428 | int i; |
429 | |
430 | for (i = 0; i < nhg->num_nh; i++) { |
431 | struct nexthop *nhe = nhg->nh_entries[i].nh; |
432 | |
433 | nhi = rcu_dereference(nhe->nh_info); |
434 | if (nhc_l3mdev_matches_dev(nhc: &nhi->fib_nhc, dev)) |
435 | return true; |
436 | } |
437 | } else { |
438 | nhi = rcu_dereference(nh->nh_info); |
439 | if (nhc_l3mdev_matches_dev(nhc: &nhi->fib_nhc, dev)) |
440 | return true; |
441 | } |
442 | |
443 | return false; |
444 | } |
445 | |
446 | static inline unsigned int fib_info_num_path(const struct fib_info *fi) |
447 | { |
448 | if (unlikely(fi->nh)) |
449 | return nexthop_num_path(nh: fi->nh); |
450 | |
451 | return fi->fib_nhs; |
452 | } |
453 | |
454 | int fib_check_nexthop(struct nexthop *nh, u8 scope, |
455 | struct netlink_ext_ack *extack); |
456 | |
457 | static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) |
458 | { |
459 | if (unlikely(fi->nh)) |
460 | return nexthop_fib_nhc(nh: fi->nh, nhsel); |
461 | |
462 | return &fi->fib_nh[nhsel].nh_common; |
463 | } |
464 | |
465 | /* only used when fib_nh is built into fib_info */ |
466 | static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) |
467 | { |
468 | WARN_ON(fi->nh); |
469 | |
470 | return &fi->fib_nh[nhsel]; |
471 | } |
472 | |
473 | /* |
474 | * IPv6 variants |
475 | */ |
476 | int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, |
477 | struct netlink_ext_ack *extack); |
478 | |
479 | /* Caller should either hold rcu_read_lock(), or RTNL. */ |
480 | static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) |
481 | { |
482 | struct nh_info *nhi; |
483 | |
484 | if (nh->is_group) { |
485 | struct nh_group *nh_grp; |
486 | |
487 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
488 | nh = nexthop_mpath_select(nhg: nh_grp, nhsel: 0); |
489 | if (!nh) |
490 | return NULL; |
491 | } |
492 | |
493 | nhi = rcu_dereference_rtnl(nh->nh_info); |
494 | if (nhi->family == AF_INET6) |
495 | return &nhi->fib6_nh; |
496 | |
497 | return NULL; |
498 | } |
499 | |
500 | static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) |
501 | { |
502 | struct fib6_nh *fib6_nh; |
503 | |
504 | fib6_nh = f6i->nh ? nexthop_fib6_nh(nh: f6i->nh) : f6i->fib6_nh; |
505 | return fib6_nh->fib_nh_dev; |
506 | } |
507 | |
508 | static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) |
509 | { |
510 | struct nexthop *nh = res->f6i->nh; |
511 | struct nh_info *nhi; |
512 | |
513 | nh = nexthop_select_path(nh, hash); |
514 | |
515 | nhi = rcu_dereference_rtnl(nh->nh_info); |
516 | if (nhi->reject_nh) { |
517 | res->fib6_type = RTN_BLACKHOLE; |
518 | res->fib6_flags |= RTF_REJECT; |
519 | res->nh = nexthop_fib6_nh(nh); |
520 | } else { |
521 | res->nh = &nhi->fib6_nh; |
522 | } |
523 | } |
524 | |
525 | int nexthop_for_each_fib6_nh(struct nexthop *nh, |
526 | int (*cb)(struct fib6_nh *nh, void *arg), |
527 | void *arg); |
528 | |
529 | static inline int nexthop_get_family(struct nexthop *nh) |
530 | { |
531 | struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); |
532 | |
533 | return nhi->family; |
534 | } |
535 | |
536 | static inline |
537 | struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) |
538 | { |
539 | struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); |
540 | |
541 | return &nhi->fib_nhc; |
542 | } |
543 | |
544 | static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, |
545 | int hash) |
546 | { |
547 | struct nh_info *nhi; |
548 | struct nexthop *nhp; |
549 | |
550 | nhp = nexthop_select_path(nh, hash); |
551 | if (unlikely(!nhp)) |
552 | return NULL; |
553 | nhi = rcu_dereference(nhp->nh_info); |
554 | return &nhi->fib_nhc; |
555 | } |
556 | #endif |
557 | |