1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * net/core/dst.c Protocol independent destination cache. |
4 | * |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | * |
7 | */ |
8 | |
9 | #include <linux/bitops.h> |
10 | #include <linux/errno.h> |
11 | #include <linux/init.h> |
12 | #include <linux/kernel.h> |
13 | #include <linux/workqueue.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/module.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/netdevice.h> |
18 | #include <linux/skbuff.h> |
19 | #include <linux/string.h> |
20 | #include <linux/types.h> |
21 | #include <net/net_namespace.h> |
22 | #include <linux/sched.h> |
23 | #include <linux/prefetch.h> |
24 | #include <net/lwtunnel.h> |
25 | #include <net/xfrm.h> |
26 | |
27 | #include <net/dst.h> |
28 | #include <net/dst_metadata.h> |
29 | |
30 | int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) |
31 | { |
32 | kfree_skb(skb); |
33 | return 0; |
34 | } |
35 | EXPORT_SYMBOL(dst_discard_out); |
36 | |
37 | const struct dst_metrics dst_default_metrics = { |
38 | /* This initializer is needed to force linker to place this variable |
39 | * into const section. Otherwise it might end into bss section. |
40 | * We really want to avoid false sharing on this variable, and catch |
41 | * any writes on it. |
42 | */ |
43 | .refcnt = REFCOUNT_INIT(1), |
44 | }; |
45 | EXPORT_SYMBOL(dst_default_metrics); |
46 | |
47 | void dst_init(struct dst_entry *dst, struct dst_ops *ops, |
48 | struct net_device *dev, int initial_obsolete, |
49 | unsigned short flags) |
50 | { |
51 | dst->dev = dev; |
52 | netdev_hold(dev, tracker: &dst->dev_tracker, GFP_ATOMIC); |
53 | dst->ops = ops; |
54 | dst_init_metrics(dst, src_metrics: dst_default_metrics.metrics, read_only: true); |
55 | dst->expires = 0UL; |
56 | #ifdef CONFIG_XFRM |
57 | dst->xfrm = NULL; |
58 | #endif |
59 | dst->input = dst_discard; |
60 | dst->output = dst_discard_out; |
61 | dst->error = 0; |
62 | dst->obsolete = initial_obsolete; |
63 | dst->header_len = 0; |
64 | dst->trailer_len = 0; |
65 | #ifdef CONFIG_IP_ROUTE_CLASSID |
66 | dst->tclassid = 0; |
67 | #endif |
68 | dst->lwtstate = NULL; |
69 | rcuref_init(ref: &dst->__rcuref, cnt: 1); |
70 | INIT_LIST_HEAD(list: &dst->rt_uncached); |
71 | dst->__use = 0; |
72 | dst->lastuse = jiffies; |
73 | dst->flags = flags; |
74 | if (!(flags & DST_NOCOUNT)) |
75 | dst_entries_add(dst: ops, val: 1); |
76 | } |
77 | EXPORT_SYMBOL(dst_init); |
78 | |
79 | void *dst_alloc(struct dst_ops *ops, struct net_device *dev, |
80 | int initial_obsolete, unsigned short flags) |
81 | { |
82 | struct dst_entry *dst; |
83 | |
84 | if (ops->gc && |
85 | !(flags & DST_NOCOUNT) && |
86 | dst_entries_get_fast(dst: ops) > ops->gc_thresh) |
87 | ops->gc(ops); |
88 | |
89 | dst = kmem_cache_alloc(cachep: ops->kmem_cachep, GFP_ATOMIC); |
90 | if (!dst) |
91 | return NULL; |
92 | |
93 | dst_init(dst, ops, dev, initial_obsolete, flags); |
94 | |
95 | return dst; |
96 | } |
97 | EXPORT_SYMBOL(dst_alloc); |
98 | |
99 | static void dst_destroy(struct dst_entry *dst) |
100 | { |
101 | struct dst_entry *child = NULL; |
102 | |
103 | smp_rmb(); |
104 | |
105 | #ifdef CONFIG_XFRM |
106 | if (dst->xfrm) { |
107 | struct xfrm_dst *xdst = (struct xfrm_dst *) dst; |
108 | |
109 | child = xdst->child; |
110 | } |
111 | #endif |
112 | if (!(dst->flags & DST_NOCOUNT)) |
113 | dst_entries_add(dst: dst->ops, val: -1); |
114 | |
115 | if (dst->ops->destroy) |
116 | dst->ops->destroy(dst); |
117 | netdev_put(dev: dst->dev, tracker: &dst->dev_tracker); |
118 | |
119 | lwtstate_put(lws: dst->lwtstate); |
120 | |
121 | if (dst->flags & DST_METADATA) |
122 | metadata_dst_free((struct metadata_dst *)dst); |
123 | else |
124 | kmem_cache_free(s: dst->ops->kmem_cachep, objp: dst); |
125 | |
126 | dst = child; |
127 | if (dst) |
128 | dst_release_immediate(dst); |
129 | } |
130 | |
131 | static void dst_destroy_rcu(struct rcu_head *head) |
132 | { |
133 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); |
134 | |
135 | dst_destroy(dst); |
136 | } |
137 | |
138 | /* Operations to mark dst as DEAD and clean up the net device referenced |
139 | * by dst: |
140 | * 1. put the dst under blackhole interface and discard all tx/rx packets |
141 | * on this route. |
142 | * 2. release the net_device |
143 | * This function should be called when removing routes from the fib tree |
144 | * in preparation for a NETDEV_DOWN/NETDEV_UNREGISTER event and also to |
145 | * make the next dst_ops->check() fail. |
146 | */ |
147 | void dst_dev_put(struct dst_entry *dst) |
148 | { |
149 | struct net_device *dev = dst->dev; |
150 | |
151 | dst->obsolete = DST_OBSOLETE_DEAD; |
152 | if (dst->ops->ifdown) |
153 | dst->ops->ifdown(dst, dev); |
154 | dst->input = dst_discard; |
155 | dst->output = dst_discard_out; |
156 | dst->dev = blackhole_netdev; |
157 | netdev_ref_replace(odev: dev, ndev: blackhole_netdev, tracker: &dst->dev_tracker, |
158 | GFP_ATOMIC); |
159 | } |
160 | EXPORT_SYMBOL(dst_dev_put); |
161 | |
162 | void dst_release(struct dst_entry *dst) |
163 | { |
164 | if (dst && rcuref_put(ref: &dst->__rcuref)) |
165 | call_rcu_hurry(head: &dst->rcu_head, func: dst_destroy_rcu); |
166 | } |
167 | EXPORT_SYMBOL(dst_release); |
168 | |
169 | void dst_release_immediate(struct dst_entry *dst) |
170 | { |
171 | if (dst && rcuref_put(ref: &dst->__rcuref)) |
172 | dst_destroy(dst); |
173 | } |
174 | EXPORT_SYMBOL(dst_release_immediate); |
175 | |
176 | u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) |
177 | { |
178 | struct dst_metrics *p = kmalloc(size: sizeof(*p), GFP_ATOMIC); |
179 | |
180 | if (p) { |
181 | struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); |
182 | unsigned long prev, new; |
183 | |
184 | refcount_set(r: &p->refcnt, n: 1); |
185 | memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); |
186 | |
187 | new = (unsigned long) p; |
188 | prev = cmpxchg(&dst->_metrics, old, new); |
189 | |
190 | if (prev != old) { |
191 | kfree(objp: p); |
192 | p = (struct dst_metrics *)__DST_METRICS_PTR(prev); |
193 | if (prev & DST_METRICS_READ_ONLY) |
194 | p = NULL; |
195 | } else if (prev & DST_METRICS_REFCOUNTED) { |
196 | if (refcount_dec_and_test(r: &old_p->refcnt)) |
197 | kfree(objp: old_p); |
198 | } |
199 | } |
200 | BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); |
201 | return (u32 *)p; |
202 | } |
203 | EXPORT_SYMBOL(dst_cow_metrics_generic); |
204 | |
205 | /* Caller asserts that dst_metrics_read_only(dst) is false. */ |
206 | void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) |
207 | { |
208 | unsigned long prev, new; |
209 | |
210 | new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; |
211 | prev = cmpxchg(&dst->_metrics, old, new); |
212 | if (prev == old) |
213 | kfree(__DST_METRICS_PTR(old)); |
214 | } |
215 | EXPORT_SYMBOL(__dst_destroy_metrics_generic); |
216 | |
217 | struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie) |
218 | { |
219 | return NULL; |
220 | } |
221 | |
222 | u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old) |
223 | { |
224 | return NULL; |
225 | } |
226 | |
227 | struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, |
228 | struct sk_buff *skb, |
229 | const void *daddr) |
230 | { |
231 | return NULL; |
232 | } |
233 | |
234 | void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, |
235 | struct sk_buff *skb, u32 mtu, |
236 | bool confirm_neigh) |
237 | { |
238 | } |
239 | EXPORT_SYMBOL_GPL(dst_blackhole_update_pmtu); |
240 | |
241 | void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, |
242 | struct sk_buff *skb) |
243 | { |
244 | } |
245 | EXPORT_SYMBOL_GPL(dst_blackhole_redirect); |
246 | |
247 | unsigned int dst_blackhole_mtu(const struct dst_entry *dst) |
248 | { |
249 | unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); |
250 | |
251 | return mtu ? : dst->dev->mtu; |
252 | } |
253 | EXPORT_SYMBOL_GPL(dst_blackhole_mtu); |
254 | |
255 | static struct dst_ops dst_blackhole_ops = { |
256 | .family = AF_UNSPEC, |
257 | .neigh_lookup = dst_blackhole_neigh_lookup, |
258 | .check = dst_blackhole_check, |
259 | .cow_metrics = dst_blackhole_cow_metrics, |
260 | .update_pmtu = dst_blackhole_update_pmtu, |
261 | .redirect = dst_blackhole_redirect, |
262 | .mtu = dst_blackhole_mtu, |
263 | }; |
264 | |
265 | static void __metadata_dst_init(struct metadata_dst *md_dst, |
266 | enum metadata_type type, u8 optslen) |
267 | { |
268 | struct dst_entry *dst; |
269 | |
270 | dst = &md_dst->dst; |
271 | dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE, |
272 | DST_METADATA | DST_NOCOUNT); |
273 | memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); |
274 | md_dst->type = type; |
275 | } |
276 | |
277 | struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, |
278 | gfp_t flags) |
279 | { |
280 | struct metadata_dst *md_dst; |
281 | |
282 | md_dst = kmalloc(size: sizeof(*md_dst) + optslen, flags); |
283 | if (!md_dst) |
284 | return NULL; |
285 | |
286 | __metadata_dst_init(md_dst, type, optslen); |
287 | |
288 | return md_dst; |
289 | } |
290 | EXPORT_SYMBOL_GPL(metadata_dst_alloc); |
291 | |
292 | void metadata_dst_free(struct metadata_dst *md_dst) |
293 | { |
294 | #ifdef CONFIG_DST_CACHE |
295 | if (md_dst->type == METADATA_IP_TUNNEL) |
296 | dst_cache_destroy(dst_cache: &md_dst->u.tun_info.dst_cache); |
297 | #endif |
298 | if (md_dst->type == METADATA_XFRM) |
299 | dst_release(md_dst->u.xfrm_info.dst_orig); |
300 | kfree(objp: md_dst); |
301 | } |
302 | EXPORT_SYMBOL_GPL(metadata_dst_free); |
303 | |
304 | struct metadata_dst __percpu * |
305 | metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) |
306 | { |
307 | int cpu; |
308 | struct metadata_dst __percpu *md_dst; |
309 | |
310 | md_dst = __alloc_percpu_gfp(size: sizeof(struct metadata_dst) + optslen, |
311 | align: __alignof__(struct metadata_dst), gfp: flags); |
312 | if (!md_dst) |
313 | return NULL; |
314 | |
315 | for_each_possible_cpu(cpu) |
316 | __metadata_dst_init(per_cpu_ptr(md_dst, cpu), type, optslen); |
317 | |
318 | return md_dst; |
319 | } |
320 | EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); |
321 | |
322 | void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) |
323 | { |
324 | int cpu; |
325 | |
326 | for_each_possible_cpu(cpu) { |
327 | struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); |
328 | |
329 | #ifdef CONFIG_DST_CACHE |
330 | if (one_md_dst->type == METADATA_IP_TUNNEL) |
331 | dst_cache_destroy(dst_cache: &one_md_dst->u.tun_info.dst_cache); |
332 | #endif |
333 | if (one_md_dst->type == METADATA_XFRM) |
334 | dst_release(one_md_dst->u.xfrm_info.dst_orig); |
335 | } |
336 | free_percpu(pdata: md_dst); |
337 | } |
338 | EXPORT_SYMBOL_GPL(metadata_dst_free_percpu); |
339 | |