1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * net/dst.h Protocol independent destination cache definitions. |
4 | * |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | * |
7 | */ |
8 | |
9 | #ifndef _NET_DST_H |
10 | #define _NET_DST_H |
11 | |
12 | #include <net/dst_ops.h> |
13 | #include <linux/netdevice.h> |
14 | #include <linux/rtnetlink.h> |
15 | #include <linux/rcupdate.h> |
16 | #include <linux/bug.h> |
17 | #include <linux/jiffies.h> |
18 | #include <linux/refcount.h> |
19 | #include <linux/rcuref.h> |
20 | #include <net/neighbour.h> |
21 | #include <asm/processor.h> |
22 | #include <linux/indirect_call_wrapper.h> |
23 | |
24 | struct sk_buff; |
25 | |
26 | struct dst_entry { |
27 | struct net_device *dev; |
28 | struct dst_ops *ops; |
29 | unsigned long _metrics; |
30 | unsigned long expires; |
31 | #ifdef CONFIG_XFRM |
32 | struct xfrm_state *xfrm; |
33 | #else |
34 | void *__pad1; |
35 | #endif |
36 | int (*input)(struct sk_buff *); |
37 | int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); |
38 | |
39 | unsigned short flags; |
40 | #define DST_NOXFRM 0x0002 |
41 | #define DST_NOPOLICY 0x0004 |
42 | #define DST_NOCOUNT 0x0008 |
43 | #define DST_FAKE_RTABLE 0x0010 |
44 | #define DST_XFRM_TUNNEL 0x0020 |
45 | #define DST_XFRM_QUEUE 0x0040 |
46 | #define DST_METADATA 0x0080 |
47 | |
48 | /* A non-zero value of dst->obsolete forces by-hand validation |
49 | * of the route entry. Positive values are set by the generic |
50 | * dst layer to indicate that the entry has been forcefully |
51 | * destroyed. |
52 | * |
53 | * Negative values are used by the implementation layer code to |
54 | * force invocation of the dst_ops->check() method. |
55 | */ |
56 | short obsolete; |
57 | #define DST_OBSOLETE_NONE 0 |
58 | #define DST_OBSOLETE_DEAD 2 |
59 | #define DST_OBSOLETE_FORCE_CHK -1 |
60 | #define DST_OBSOLETE_KILL -2 |
61 | unsigned short ; /* more space at head required */ |
62 | unsigned short trailer_len; /* space to reserve at tail */ |
63 | |
64 | /* |
65 | * __rcuref wants to be on a different cache line from |
66 | * input/output/ops or performance tanks badly |
67 | */ |
68 | #ifdef CONFIG_64BIT |
69 | rcuref_t __rcuref; /* 64-bit offset 64 */ |
70 | #endif |
71 | int __use; |
72 | unsigned long lastuse; |
73 | struct rcu_head rcu_head; |
74 | short error; |
75 | short __pad; |
76 | __u32 tclassid; |
77 | #ifndef CONFIG_64BIT |
78 | struct lwtunnel_state *lwtstate; |
79 | rcuref_t __rcuref; /* 32-bit offset 64 */ |
80 | #endif |
81 | netdevice_tracker dev_tracker; |
82 | |
83 | /* |
84 | * Used by rtable and rt6_info. Moves lwtstate into the next cache |
85 | * line on 64bit so that lwtstate does not cause false sharing with |
86 | * __rcuref under contention of __rcuref. This also puts the |
87 | * frequently accessed members of rtable and rt6_info out of the |
88 | * __rcuref cache line. |
89 | */ |
90 | struct list_head rt_uncached; |
91 | struct uncached_list *rt_uncached_list; |
92 | #ifdef CONFIG_64BIT |
93 | struct lwtunnel_state *lwtstate; |
94 | #endif |
95 | }; |
96 | |
97 | struct dst_metrics { |
98 | u32 metrics[RTAX_MAX]; |
99 | refcount_t refcnt; |
100 | } __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ |
101 | extern const struct dst_metrics dst_default_metrics; |
102 | |
103 | u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); |
104 | |
105 | #define DST_METRICS_READ_ONLY 0x1UL |
106 | #define DST_METRICS_REFCOUNTED 0x2UL |
107 | #define DST_METRICS_FLAGS 0x3UL |
108 | #define __DST_METRICS_PTR(Y) \ |
109 | ((u32 *)((Y) & ~DST_METRICS_FLAGS)) |
110 | #define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) |
111 | |
112 | static inline bool dst_metrics_read_only(const struct dst_entry *dst) |
113 | { |
114 | return dst->_metrics & DST_METRICS_READ_ONLY; |
115 | } |
116 | |
117 | void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); |
118 | |
119 | static inline void dst_destroy_metrics_generic(struct dst_entry *dst) |
120 | { |
121 | unsigned long val = dst->_metrics; |
122 | if (!(val & DST_METRICS_READ_ONLY)) |
123 | __dst_destroy_metrics_generic(dst, old: val); |
124 | } |
125 | |
126 | static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst) |
127 | { |
128 | unsigned long p = dst->_metrics; |
129 | |
130 | BUG_ON(!p); |
131 | |
132 | if (p & DST_METRICS_READ_ONLY) |
133 | return dst->ops->cow_metrics(dst, p); |
134 | return __DST_METRICS_PTR(p); |
135 | } |
136 | |
137 | /* This may only be invoked before the entry has reached global |
138 | * visibility. |
139 | */ |
140 | static inline void dst_init_metrics(struct dst_entry *dst, |
141 | const u32 *src_metrics, |
142 | bool read_only) |
143 | { |
144 | dst->_metrics = ((unsigned long) src_metrics) | |
145 | (read_only ? DST_METRICS_READ_ONLY : 0); |
146 | } |
147 | |
148 | static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) |
149 | { |
150 | u32 *dst_metrics = dst_metrics_write_ptr(dst: dest); |
151 | |
152 | if (dst_metrics) { |
153 | u32 *src_metrics = DST_METRICS_PTR(src); |
154 | |
155 | memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32)); |
156 | } |
157 | } |
158 | |
159 | static inline u32 *dst_metrics_ptr(struct dst_entry *dst) |
160 | { |
161 | return DST_METRICS_PTR(dst); |
162 | } |
163 | |
164 | static inline u32 |
165 | dst_metric_raw(const struct dst_entry *dst, const int metric) |
166 | { |
167 | u32 *p = DST_METRICS_PTR(dst); |
168 | |
169 | return p[metric-1]; |
170 | } |
171 | |
172 | static inline u32 |
173 | dst_metric(const struct dst_entry *dst, const int metric) |
174 | { |
175 | WARN_ON_ONCE(metric == RTAX_HOPLIMIT || |
176 | metric == RTAX_ADVMSS || |
177 | metric == RTAX_MTU); |
178 | return dst_metric_raw(dst, metric); |
179 | } |
180 | |
181 | static inline u32 |
182 | dst_metric_advmss(const struct dst_entry *dst) |
183 | { |
184 | u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); |
185 | |
186 | if (!advmss) |
187 | advmss = dst->ops->default_advmss(dst); |
188 | |
189 | return advmss; |
190 | } |
191 | |
192 | static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) |
193 | { |
194 | u32 *p = dst_metrics_write_ptr(dst); |
195 | |
196 | if (p) |
197 | p[metric-1] = val; |
198 | } |
199 | |
200 | /* Kernel-internal feature bits that are unallocated in user space. */ |
201 | #define DST_FEATURE_ECN_CA (1U << 31) |
202 | |
203 | #define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) |
204 | #define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) |
205 | |
206 | static inline u32 |
207 | dst_feature(const struct dst_entry *dst, u32 feature) |
208 | { |
209 | return dst_metric(dst, RTAX_FEATURES) & feature; |
210 | } |
211 | |
212 | INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *)); |
213 | INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *)); |
214 | static inline u32 dst_mtu(const struct dst_entry *dst) |
215 | { |
216 | return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); |
217 | } |
218 | |
219 | /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ |
220 | static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) |
221 | { |
222 | return msecs_to_jiffies(m: dst_metric(dst, metric)); |
223 | } |
224 | |
225 | static inline int |
226 | dst_metric_locked(const struct dst_entry *dst, int metric) |
227 | { |
228 | return dst_metric(dst, RTAX_LOCK) & (1 << metric); |
229 | } |
230 | |
231 | static inline void dst_hold(struct dst_entry *dst) |
232 | { |
233 | /* |
234 | * If your kernel compilation stops here, please check |
235 | * the placement of __rcuref in struct dst_entry |
236 | */ |
237 | BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63); |
238 | WARN_ON(!rcuref_get(&dst->__rcuref)); |
239 | } |
240 | |
241 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) |
242 | { |
243 | if (unlikely(time != dst->lastuse)) { |
244 | dst->__use++; |
245 | dst->lastuse = time; |
246 | } |
247 | } |
248 | |
249 | static inline struct dst_entry *dst_clone(struct dst_entry *dst) |
250 | { |
251 | if (dst) |
252 | dst_hold(dst); |
253 | return dst; |
254 | } |
255 | |
256 | void dst_release(struct dst_entry *dst); |
257 | |
258 | void dst_release_immediate(struct dst_entry *dst); |
259 | |
260 | static inline void refdst_drop(unsigned long refdst) |
261 | { |
262 | if (!(refdst & SKB_DST_NOREF)) |
263 | dst_release(dst: (struct dst_entry *)(refdst & SKB_DST_PTRMASK)); |
264 | } |
265 | |
266 | /** |
267 | * skb_dst_drop - drops skb dst |
268 | * @skb: buffer |
269 | * |
270 | * Drops dst reference count if a reference was taken. |
271 | */ |
272 | static inline void skb_dst_drop(struct sk_buff *skb) |
273 | { |
274 | if (skb->_skb_refdst) { |
275 | refdst_drop(refdst: skb->_skb_refdst); |
276 | skb->_skb_refdst = 0UL; |
277 | } |
278 | } |
279 | |
280 | static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst) |
281 | { |
282 | nskb->slow_gro |= !!refdst; |
283 | nskb->_skb_refdst = refdst; |
284 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) |
285 | dst_clone(dst: skb_dst(skb: nskb)); |
286 | } |
287 | |
288 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) |
289 | { |
290 | __skb_dst_copy(nskb, refdst: oskb->_skb_refdst); |
291 | } |
292 | |
293 | /** |
294 | * dst_hold_safe - Take a reference on a dst if possible |
295 | * @dst: pointer to dst entry |
296 | * |
297 | * This helper returns false if it could not safely |
298 | * take a reference on a dst. |
299 | */ |
300 | static inline bool dst_hold_safe(struct dst_entry *dst) |
301 | { |
302 | return rcuref_get(ref: &dst->__rcuref); |
303 | } |
304 | |
305 | /** |
306 | * skb_dst_force - makes sure skb dst is refcounted |
307 | * @skb: buffer |
308 | * |
309 | * If dst is not yet refcounted and not destroyed, grab a ref on it. |
310 | * Returns true if dst is refcounted. |
311 | */ |
312 | static inline bool skb_dst_force(struct sk_buff *skb) |
313 | { |
314 | if (skb_dst_is_noref(skb)) { |
315 | struct dst_entry *dst = skb_dst(skb); |
316 | |
317 | WARN_ON(!rcu_read_lock_held()); |
318 | if (!dst_hold_safe(dst)) |
319 | dst = NULL; |
320 | |
321 | skb->_skb_refdst = (unsigned long)dst; |
322 | skb->slow_gro |= !!dst; |
323 | } |
324 | |
325 | return skb->_skb_refdst != 0UL; |
326 | } |
327 | |
328 | |
329 | /** |
330 | * __skb_tunnel_rx - prepare skb for rx reinsert |
331 | * @skb: buffer |
332 | * @dev: tunnel device |
333 | * @net: netns for packet i/o |
334 | * |
335 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, |
336 | * so make some cleanups. (no accounting done) |
337 | */ |
338 | static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, |
339 | struct net *net) |
340 | { |
341 | skb->dev = dev; |
342 | |
343 | /* |
344 | * Clear hash so that we can recalulate the hash for the |
345 | * encapsulated packet, unless we have already determine the hash |
346 | * over the L4 4-tuple. |
347 | */ |
348 | skb_clear_hash_if_not_l4(skb); |
349 | skb_set_queue_mapping(skb, queue_mapping: 0); |
350 | skb_scrub_packet(skb, xnet: !net_eq(net1: net, net2: dev_net(dev))); |
351 | } |
352 | |
353 | /** |
354 | * skb_tunnel_rx - prepare skb for rx reinsert |
355 | * @skb: buffer |
356 | * @dev: tunnel device |
357 | * @net: netns for packet i/o |
358 | * |
359 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, |
360 | * so make some cleanups, and perform accounting. |
361 | * Note: this accounting is not SMP safe. |
362 | */ |
363 | static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, |
364 | struct net *net) |
365 | { |
366 | DEV_STATS_INC(dev, rx_packets); |
367 | DEV_STATS_ADD(dev, rx_bytes, skb->len); |
368 | __skb_tunnel_rx(skb, dev, net); |
369 | } |
370 | |
371 | static inline u32 dst_tclassid(const struct sk_buff *skb) |
372 | { |
373 | #ifdef CONFIG_IP_ROUTE_CLASSID |
374 | const struct dst_entry *dst; |
375 | |
376 | dst = skb_dst(skb); |
377 | if (dst) |
378 | return dst->tclassid; |
379 | #endif |
380 | return 0; |
381 | } |
382 | |
383 | int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); |
384 | static inline int dst_discard(struct sk_buff *skb) |
385 | { |
386 | return dst_discard_out(net: &init_net, sk: skb->sk, skb); |
387 | } |
388 | void *dst_alloc(struct dst_ops *ops, struct net_device *dev, |
389 | int initial_obsolete, unsigned short flags); |
390 | void dst_init(struct dst_entry *dst, struct dst_ops *ops, |
391 | struct net_device *dev, int initial_obsolete, |
392 | unsigned short flags); |
393 | void dst_dev_put(struct dst_entry *dst); |
394 | |
395 | static inline void dst_confirm(struct dst_entry *dst) |
396 | { |
397 | } |
398 | |
399 | static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) |
400 | { |
401 | struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr); |
402 | return IS_ERR(ptr: n) ? NULL : n; |
403 | } |
404 | |
405 | static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, |
406 | struct sk_buff *skb) |
407 | { |
408 | struct neighbour *n; |
409 | |
410 | if (WARN_ON_ONCE(!dst->ops->neigh_lookup)) |
411 | return NULL; |
412 | |
413 | n = dst->ops->neigh_lookup(dst, skb, NULL); |
414 | |
415 | return IS_ERR(ptr: n) ? NULL : n; |
416 | } |
417 | |
418 | static inline void dst_confirm_neigh(const struct dst_entry *dst, |
419 | const void *daddr) |
420 | { |
421 | if (dst->ops->confirm_neigh) |
422 | dst->ops->confirm_neigh(dst, daddr); |
423 | } |
424 | |
425 | static inline void dst_link_failure(struct sk_buff *skb) |
426 | { |
427 | struct dst_entry *dst = skb_dst(skb); |
428 | if (dst && dst->ops && dst->ops->link_failure) |
429 | dst->ops->link_failure(skb); |
430 | } |
431 | |
432 | static inline void dst_set_expires(struct dst_entry *dst, int timeout) |
433 | { |
434 | unsigned long expires = jiffies + timeout; |
435 | |
436 | if (expires == 0) |
437 | expires = 1; |
438 | |
439 | if (dst->expires == 0 || time_before(expires, dst->expires)) |
440 | dst->expires = expires; |
441 | } |
442 | |
443 | INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, |
444 | struct sk_buff *)); |
445 | INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, |
446 | struct sk_buff *)); |
447 | /* Output packet to network from transport. */ |
448 | static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) |
449 | { |
450 | return INDIRECT_CALL_INET(skb_dst(skb)->output, |
451 | ip6_output, ip_output, |
452 | net, sk, skb); |
453 | } |
454 | |
455 | INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *)); |
456 | INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); |
457 | /* Input packet from network to transport. */ |
458 | static inline int dst_input(struct sk_buff *skb) |
459 | { |
460 | return INDIRECT_CALL_INET(skb_dst(skb)->input, |
461 | ip6_input, ip_local_deliver, skb); |
462 | } |
463 | |
464 | INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, |
465 | u32)); |
466 | INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, |
467 | u32)); |
468 | static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) |
469 | { |
470 | if (dst->obsolete) |
471 | dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, |
472 | ipv4_dst_check, dst, cookie); |
473 | return dst; |
474 | } |
475 | |
476 | /* Flags for xfrm_lookup flags argument. */ |
477 | enum { |
478 | XFRM_LOOKUP_ICMP = 1 << 0, |
479 | XFRM_LOOKUP_QUEUE = 1 << 1, |
480 | XFRM_LOOKUP_KEEP_DST_REF = 1 << 2, |
481 | }; |
482 | |
483 | struct flowi; |
484 | #ifndef CONFIG_XFRM |
485 | static inline struct dst_entry *xfrm_lookup(struct net *net, |
486 | struct dst_entry *dst_orig, |
487 | const struct flowi *fl, |
488 | const struct sock *sk, |
489 | int flags) |
490 | { |
491 | return dst_orig; |
492 | } |
493 | |
494 | static inline struct dst_entry * |
495 | xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, |
496 | const struct flowi *fl, const struct sock *sk, |
497 | int flags, u32 if_id) |
498 | { |
499 | return dst_orig; |
500 | } |
501 | |
502 | static inline struct dst_entry *xfrm_lookup_route(struct net *net, |
503 | struct dst_entry *dst_orig, |
504 | const struct flowi *fl, |
505 | const struct sock *sk, |
506 | int flags) |
507 | { |
508 | return dst_orig; |
509 | } |
510 | |
511 | static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) |
512 | { |
513 | return NULL; |
514 | } |
515 | |
516 | #else |
517 | struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, |
518 | const struct flowi *fl, const struct sock *sk, |
519 | int flags); |
520 | |
521 | struct dst_entry *xfrm_lookup_with_ifid(struct net *net, |
522 | struct dst_entry *dst_orig, |
523 | const struct flowi *fl, |
524 | const struct sock *sk, int flags, |
525 | u32 if_id); |
526 | |
527 | struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, |
528 | const struct flowi *fl, const struct sock *sk, |
529 | int flags); |
530 | |
531 | /* skb attached with this dst needs transformation if dst->xfrm is valid */ |
532 | static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) |
533 | { |
534 | return dst->xfrm; |
535 | } |
536 | #endif |
537 | |
538 | static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) |
539 | { |
540 | struct dst_entry *dst = skb_dst(skb); |
541 | |
542 | if (dst && dst->ops->update_pmtu) |
543 | dst->ops->update_pmtu(dst, NULL, skb, mtu, true); |
544 | } |
545 | |
546 | /* update dst pmtu but not do neighbor confirm */ |
547 | static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) |
548 | { |
549 | struct dst_entry *dst = skb_dst(skb); |
550 | |
551 | if (dst && dst->ops->update_pmtu) |
552 | dst->ops->update_pmtu(dst, NULL, skb, mtu, false); |
553 | } |
554 | |
555 | struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); |
556 | void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, |
557 | struct sk_buff *skb, u32 mtu, bool confirm_neigh); |
558 | void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, |
559 | struct sk_buff *skb); |
560 | u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old); |
561 | struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, |
562 | struct sk_buff *skb, |
563 | const void *daddr); |
564 | unsigned int dst_blackhole_mtu(const struct dst_entry *dst); |
565 | |
566 | #endif /* _NET_DST_H */ |
567 | |