1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * net/dst.h Protocol independent destination cache definitions. |
4 | * |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | * |
7 | */ |
8 | |
9 | #ifndef _NET_DST_H |
10 | #define _NET_DST_H |
11 | |
12 | #include <net/dst_ops.h> |
13 | #include <linux/netdevice.h> |
14 | #include <linux/rtnetlink.h> |
15 | #include <linux/rcupdate.h> |
16 | #include <linux/bug.h> |
17 | #include <linux/jiffies.h> |
18 | #include <linux/refcount.h> |
19 | #include <net/neighbour.h> |
20 | #include <asm/processor.h> |
21 | #include <linux/indirect_call_wrapper.h> |
22 | |
23 | struct sk_buff; |
24 | |
25 | struct dst_entry { |
26 | struct net_device *dev; |
27 | struct dst_ops *ops; |
28 | unsigned long _metrics; |
29 | unsigned long expires; |
30 | #ifdef CONFIG_XFRM |
31 | struct xfrm_state *xfrm; |
32 | #else |
33 | void *__pad1; |
34 | #endif |
35 | int (*input)(struct sk_buff *); |
36 | int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); |
37 | |
38 | unsigned short flags; |
39 | #define DST_NOXFRM 0x0002 |
40 | #define DST_NOPOLICY 0x0004 |
41 | #define DST_NOCOUNT 0x0008 |
42 | #define DST_FAKE_RTABLE 0x0010 |
43 | #define DST_XFRM_TUNNEL 0x0020 |
44 | #define DST_XFRM_QUEUE 0x0040 |
45 | #define DST_METADATA 0x0080 |
46 | |
47 | /* A non-zero value of dst->obsolete forces by-hand validation |
48 | * of the route entry. Positive values are set by the generic |
49 | * dst layer to indicate that the entry has been forcefully |
50 | * destroyed. |
51 | * |
52 | * Negative values are used by the implementation layer code to |
53 | * force invocation of the dst_ops->check() method. |
54 | */ |
55 | short obsolete; |
56 | #define DST_OBSOLETE_NONE 0 |
57 | #define DST_OBSOLETE_DEAD 2 |
58 | #define DST_OBSOLETE_FORCE_CHK -1 |
59 | #define DST_OBSOLETE_KILL -2 |
60 | unsigned short ; /* more space at head required */ |
61 | unsigned short trailer_len; /* space to reserve at tail */ |
62 | |
63 | /* |
64 | * __refcnt wants to be on a different cache line from |
65 | * input/output/ops or performance tanks badly |
66 | */ |
67 | #ifdef CONFIG_64BIT |
68 | atomic_t __refcnt; /* 64-bit offset 64 */ |
69 | #endif |
70 | int __use; |
71 | unsigned long lastuse; |
72 | struct lwtunnel_state *lwtstate; |
73 | struct rcu_head rcu_head; |
74 | short error; |
75 | short __pad; |
76 | __u32 tclassid; |
77 | #ifndef CONFIG_64BIT |
78 | atomic_t __refcnt; /* 32-bit offset 64 */ |
79 | #endif |
80 | netdevice_tracker dev_tracker; |
81 | }; |
82 | |
83 | struct dst_metrics { |
84 | u32 metrics[RTAX_MAX]; |
85 | refcount_t refcnt; |
86 | } __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ |
87 | extern const struct dst_metrics dst_default_metrics; |
88 | |
89 | u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); |
90 | |
91 | #define DST_METRICS_READ_ONLY 0x1UL |
92 | #define DST_METRICS_REFCOUNTED 0x2UL |
93 | #define DST_METRICS_FLAGS 0x3UL |
94 | #define __DST_METRICS_PTR(Y) \ |
95 | ((u32 *)((Y) & ~DST_METRICS_FLAGS)) |
96 | #define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) |
97 | |
98 | static inline bool dst_metrics_read_only(const struct dst_entry *dst) |
99 | { |
100 | return dst->_metrics & DST_METRICS_READ_ONLY; |
101 | } |
102 | |
103 | void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); |
104 | |
105 | static inline void dst_destroy_metrics_generic(struct dst_entry *dst) |
106 | { |
107 | unsigned long val = dst->_metrics; |
108 | if (!(val & DST_METRICS_READ_ONLY)) |
109 | __dst_destroy_metrics_generic(dst, val); |
110 | } |
111 | |
112 | static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst) |
113 | { |
114 | unsigned long p = dst->_metrics; |
115 | |
116 | BUG_ON(!p); |
117 | |
118 | if (p & DST_METRICS_READ_ONLY) |
119 | return dst->ops->cow_metrics(dst, p); |
120 | return __DST_METRICS_PTR(p); |
121 | } |
122 | |
123 | /* This may only be invoked before the entry has reached global |
124 | * visibility. |
125 | */ |
126 | static inline void dst_init_metrics(struct dst_entry *dst, |
127 | const u32 *src_metrics, |
128 | bool read_only) |
129 | { |
130 | dst->_metrics = ((unsigned long) src_metrics) | |
131 | (read_only ? DST_METRICS_READ_ONLY : 0); |
132 | } |
133 | |
134 | static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) |
135 | { |
136 | u32 *dst_metrics = dst_metrics_write_ptr(dest); |
137 | |
138 | if (dst_metrics) { |
139 | u32 *src_metrics = DST_METRICS_PTR(src); |
140 | |
141 | memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32)); |
142 | } |
143 | } |
144 | |
145 | static inline u32 *dst_metrics_ptr(struct dst_entry *dst) |
146 | { |
147 | return DST_METRICS_PTR(dst); |
148 | } |
149 | |
150 | static inline u32 |
151 | dst_metric_raw(const struct dst_entry *dst, const int metric) |
152 | { |
153 | u32 *p = DST_METRICS_PTR(dst); |
154 | |
155 | return p[metric-1]; |
156 | } |
157 | |
158 | static inline u32 |
159 | dst_metric(const struct dst_entry *dst, const int metric) |
160 | { |
161 | WARN_ON_ONCE(metric == RTAX_HOPLIMIT || |
162 | metric == RTAX_ADVMSS || |
163 | metric == RTAX_MTU); |
164 | return dst_metric_raw(dst, metric); |
165 | } |
166 | |
167 | static inline u32 |
168 | dst_metric_advmss(const struct dst_entry *dst) |
169 | { |
170 | u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); |
171 | |
172 | if (!advmss) |
173 | advmss = dst->ops->default_advmss(dst); |
174 | |
175 | return advmss; |
176 | } |
177 | |
178 | static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) |
179 | { |
180 | u32 *p = dst_metrics_write_ptr(dst); |
181 | |
182 | if (p) |
183 | p[metric-1] = val; |
184 | } |
185 | |
186 | /* Kernel-internal feature bits that are unallocated in user space. */ |
187 | #define DST_FEATURE_ECN_CA (1U << 31) |
188 | |
189 | #define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) |
190 | #define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) |
191 | |
192 | static inline u32 |
193 | dst_feature(const struct dst_entry *dst, u32 feature) |
194 | { |
195 | return dst_metric(dst, RTAX_FEATURES) & feature; |
196 | } |
197 | |
198 | INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *)); |
199 | INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *)); |
200 | static inline u32 dst_mtu(const struct dst_entry *dst) |
201 | { |
202 | return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); |
203 | } |
204 | |
205 | /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ |
206 | static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) |
207 | { |
208 | return msecs_to_jiffies(dst_metric(dst, metric)); |
209 | } |
210 | |
211 | static inline u32 |
212 | dst_allfrag(const struct dst_entry *dst) |
213 | { |
214 | int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); |
215 | return ret; |
216 | } |
217 | |
218 | static inline int |
219 | dst_metric_locked(const struct dst_entry *dst, int metric) |
220 | { |
221 | return dst_metric(dst, RTAX_LOCK) & (1 << metric); |
222 | } |
223 | |
224 | static inline void dst_hold(struct dst_entry *dst) |
225 | { |
226 | /* |
227 | * If your kernel compilation stops here, please check |
228 | * the placement of __refcnt in struct dst_entry |
229 | */ |
230 | BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); |
231 | WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); |
232 | } |
233 | |
234 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) |
235 | { |
236 | if (unlikely(time != dst->lastuse)) { |
237 | dst->__use++; |
238 | dst->lastuse = time; |
239 | } |
240 | } |
241 | |
242 | static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time) |
243 | { |
244 | dst_hold(dst); |
245 | dst_use_noref(dst, time); |
246 | } |
247 | |
248 | static inline struct dst_entry *dst_clone(struct dst_entry *dst) |
249 | { |
250 | if (dst) |
251 | dst_hold(dst); |
252 | return dst; |
253 | } |
254 | |
255 | void dst_release(struct dst_entry *dst); |
256 | |
257 | void dst_release_immediate(struct dst_entry *dst); |
258 | |
259 | static inline void refdst_drop(unsigned long refdst) |
260 | { |
261 | if (!(refdst & SKB_DST_NOREF)) |
262 | dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); |
263 | } |
264 | |
265 | /** |
266 | * skb_dst_drop - drops skb dst |
267 | * @skb: buffer |
268 | * |
269 | * Drops dst reference count if a reference was taken. |
270 | */ |
271 | static inline void skb_dst_drop(struct sk_buff *skb) |
272 | { |
273 | if (skb->_skb_refdst) { |
274 | refdst_drop(skb->_skb_refdst); |
275 | skb->_skb_refdst = 0UL; |
276 | } |
277 | } |
278 | |
279 | static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst) |
280 | { |
281 | nskb->slow_gro |= !!refdst; |
282 | nskb->_skb_refdst = refdst; |
283 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) |
284 | dst_clone(skb_dst(nskb)); |
285 | } |
286 | |
287 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) |
288 | { |
289 | __skb_dst_copy(nskb, oskb->_skb_refdst); |
290 | } |
291 | |
292 | /** |
293 | * dst_hold_safe - Take a reference on a dst if possible |
294 | * @dst: pointer to dst entry |
295 | * |
296 | * This helper returns false if it could not safely |
297 | * take a reference on a dst. |
298 | */ |
299 | static inline bool dst_hold_safe(struct dst_entry *dst) |
300 | { |
301 | return atomic_inc_not_zero(&dst->__refcnt); |
302 | } |
303 | |
304 | /** |
305 | * skb_dst_force - makes sure skb dst is refcounted |
306 | * @skb: buffer |
307 | * |
308 | * If dst is not yet refcounted and not destroyed, grab a ref on it. |
309 | * Returns true if dst is refcounted. |
310 | */ |
311 | static inline bool skb_dst_force(struct sk_buff *skb) |
312 | { |
313 | if (skb_dst_is_noref(skb)) { |
314 | struct dst_entry *dst = skb_dst(skb); |
315 | |
316 | WARN_ON(!rcu_read_lock_held()); |
317 | if (!dst_hold_safe(dst)) |
318 | dst = NULL; |
319 | |
320 | skb->_skb_refdst = (unsigned long)dst; |
321 | skb->slow_gro |= !!dst; |
322 | } |
323 | |
324 | return skb->_skb_refdst != 0UL; |
325 | } |
326 | |
327 | |
328 | /** |
329 | * __skb_tunnel_rx - prepare skb for rx reinsert |
330 | * @skb: buffer |
331 | * @dev: tunnel device |
332 | * @net: netns for packet i/o |
333 | * |
334 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, |
335 | * so make some cleanups. (no accounting done) |
336 | */ |
337 | static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, |
338 | struct net *net) |
339 | { |
340 | skb->dev = dev; |
341 | |
342 | /* |
343 | * Clear hash so that we can recalulate the hash for the |
344 | * encapsulated packet, unless we have already determine the hash |
345 | * over the L4 4-tuple. |
346 | */ |
347 | skb_clear_hash_if_not_l4(skb); |
348 | skb_set_queue_mapping(skb, 0); |
349 | skb_scrub_packet(skb, !net_eq(net, dev_net(dev))); |
350 | } |
351 | |
352 | /** |
353 | * skb_tunnel_rx - prepare skb for rx reinsert |
354 | * @skb: buffer |
355 | * @dev: tunnel device |
356 | * @net: netns for packet i/o |
357 | * |
358 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, |
359 | * so make some cleanups, and perform accounting. |
360 | * Note: this accounting is not SMP safe. |
361 | */ |
362 | static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, |
363 | struct net *net) |
364 | { |
365 | /* TODO : stats should be SMP safe */ |
366 | dev->stats.rx_packets++; |
367 | dev->stats.rx_bytes += skb->len; |
368 | __skb_tunnel_rx(skb, dev, net); |
369 | } |
370 | |
371 | static inline u32 dst_tclassid(const struct sk_buff *skb) |
372 | { |
373 | #ifdef CONFIG_IP_ROUTE_CLASSID |
374 | const struct dst_entry *dst; |
375 | |
376 | dst = skb_dst(skb); |
377 | if (dst) |
378 | return dst->tclassid; |
379 | #endif |
380 | return 0; |
381 | } |
382 | |
383 | int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); |
384 | static inline int dst_discard(struct sk_buff *skb) |
385 | { |
386 | return dst_discard_out(&init_net, skb->sk, skb); |
387 | } |
388 | void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, |
389 | int initial_obsolete, unsigned short flags); |
390 | void dst_init(struct dst_entry *dst, struct dst_ops *ops, |
391 | struct net_device *dev, int initial_ref, int initial_obsolete, |
392 | unsigned short flags); |
393 | struct dst_entry *dst_destroy(struct dst_entry *dst); |
394 | void dst_dev_put(struct dst_entry *dst); |
395 | |
396 | static inline void dst_confirm(struct dst_entry *dst) |
397 | { |
398 | } |
399 | |
400 | static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) |
401 | { |
402 | struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr); |
403 | return IS_ERR(n) ? NULL : n; |
404 | } |
405 | |
406 | static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, |
407 | struct sk_buff *skb) |
408 | { |
409 | struct neighbour *n; |
410 | |
411 | if (WARN_ON_ONCE(!dst->ops->neigh_lookup)) |
412 | return NULL; |
413 | |
414 | n = dst->ops->neigh_lookup(dst, skb, NULL); |
415 | |
416 | return IS_ERR(n) ? NULL : n; |
417 | } |
418 | |
419 | static inline void dst_confirm_neigh(const struct dst_entry *dst, |
420 | const void *daddr) |
421 | { |
422 | if (dst->ops->confirm_neigh) |
423 | dst->ops->confirm_neigh(dst, daddr); |
424 | } |
425 | |
426 | static inline void dst_link_failure(struct sk_buff *skb) |
427 | { |
428 | struct dst_entry *dst = skb_dst(skb); |
429 | if (dst && dst->ops && dst->ops->link_failure) |
430 | dst->ops->link_failure(skb); |
431 | } |
432 | |
433 | static inline void dst_set_expires(struct dst_entry *dst, int timeout) |
434 | { |
435 | unsigned long expires = jiffies + timeout; |
436 | |
437 | if (expires == 0) |
438 | expires = 1; |
439 | |
440 | if (dst->expires == 0 || time_before(expires, dst->expires)) |
441 | dst->expires = expires; |
442 | } |
443 | |
444 | INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, |
445 | struct sk_buff *)); |
446 | INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, |
447 | struct sk_buff *)); |
448 | /* Output packet to network from transport. */ |
449 | static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) |
450 | { |
451 | return INDIRECT_CALL_INET(skb_dst(skb)->output, |
452 | ip6_output, ip_output, |
453 | net, sk, skb); |
454 | } |
455 | |
456 | INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *)); |
457 | INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); |
458 | /* Input packet from network to transport. */ |
459 | static inline int dst_input(struct sk_buff *skb) |
460 | { |
461 | return INDIRECT_CALL_INET(skb_dst(skb)->input, |
462 | ip6_input, ip_local_deliver, skb); |
463 | } |
464 | |
465 | INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, |
466 | u32)); |
467 | INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, |
468 | u32)); |
469 | static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) |
470 | { |
471 | if (dst->obsolete) |
472 | dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, |
473 | ipv4_dst_check, dst, cookie); |
474 | return dst; |
475 | } |
476 | |
477 | /* Flags for xfrm_lookup flags argument. */ |
478 | enum { |
479 | XFRM_LOOKUP_ICMP = 1 << 0, |
480 | XFRM_LOOKUP_QUEUE = 1 << 1, |
481 | XFRM_LOOKUP_KEEP_DST_REF = 1 << 2, |
482 | }; |
483 | |
484 | struct flowi; |
485 | #ifndef CONFIG_XFRM |
486 | static inline struct dst_entry *xfrm_lookup(struct net *net, |
487 | struct dst_entry *dst_orig, |
488 | const struct flowi *fl, |
489 | const struct sock *sk, |
490 | int flags) |
491 | { |
492 | return dst_orig; |
493 | } |
494 | |
495 | static inline struct dst_entry * |
496 | xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, |
497 | const struct flowi *fl, const struct sock *sk, |
498 | int flags, u32 if_id) |
499 | { |
500 | return dst_orig; |
501 | } |
502 | |
503 | static inline struct dst_entry *xfrm_lookup_route(struct net *net, |
504 | struct dst_entry *dst_orig, |
505 | const struct flowi *fl, |
506 | const struct sock *sk, |
507 | int flags) |
508 | { |
509 | return dst_orig; |
510 | } |
511 | |
512 | static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) |
513 | { |
514 | return NULL; |
515 | } |
516 | |
517 | #else |
518 | struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, |
519 | const struct flowi *fl, const struct sock *sk, |
520 | int flags); |
521 | |
522 | struct dst_entry *xfrm_lookup_with_ifid(struct net *net, |
523 | struct dst_entry *dst_orig, |
524 | const struct flowi *fl, |
525 | const struct sock *sk, int flags, |
526 | u32 if_id); |
527 | |
528 | struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, |
529 | const struct flowi *fl, const struct sock *sk, |
530 | int flags); |
531 | |
532 | /* skb attached with this dst needs transformation if dst->xfrm is valid */ |
533 | static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) |
534 | { |
535 | return dst->xfrm; |
536 | } |
537 | #endif |
538 | |
539 | static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) |
540 | { |
541 | struct dst_entry *dst = skb_dst(skb); |
542 | |
543 | if (dst && dst->ops->update_pmtu) |
544 | dst->ops->update_pmtu(dst, NULL, skb, mtu, true); |
545 | } |
546 | |
547 | /* update dst pmtu but not do neighbor confirm */ |
548 | static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) |
549 | { |
550 | struct dst_entry *dst = skb_dst(skb); |
551 | |
552 | if (dst && dst->ops->update_pmtu) |
553 | dst->ops->update_pmtu(dst, NULL, skb, mtu, false); |
554 | } |
555 | |
556 | struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); |
557 | void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, |
558 | struct sk_buff *skb, u32 mtu, bool confirm_neigh); |
559 | void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, |
560 | struct sk_buff *skb); |
561 | u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old); |
562 | struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, |
563 | struct sk_buff *skb, |
564 | const void *daddr); |
565 | unsigned int dst_blackhole_mtu(const struct dst_entry *dst); |
566 | |
567 | #endif /* _NET_DST_H */ |
568 | |