1 | // SPDX-License-Identifier: GPL-2.0 |
---|---|
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * The IP fragmentation functionality. |
8 | * |
9 | * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> |
10 | * Alan Cox <alan@lxorguk.ukuu.org.uk> |
11 | * |
12 | * Fixes: |
13 | * Alan Cox : Split from ip.c , see ip_input.c for history. |
14 | * David S. Miller : Begin massive cleanup... |
15 | * Andi Kleen : Add sysctls. |
16 | * xxxx : Overlapfrag bug. |
17 | * Ultima : ip_expire() kernel panic. |
18 | * Bill Hawes : Frag accounting and evictor fixes. |
19 | * John McDonald : 0 length frag bug. |
20 | * Alexey Kuznetsov: SMP races, threading, cleanup. |
21 | * Patrick McHardy : LRU queue of frag heads for evictor. |
22 | */ |
23 | |
24 | #define pr_fmt(fmt) "IPv4: " fmt |
25 | |
26 | #include <linux/compiler.h> |
27 | #include <linux/module.h> |
28 | #include <linux/types.h> |
29 | #include <linux/mm.h> |
30 | #include <linux/jiffies.h> |
31 | #include <linux/skbuff.h> |
32 | #include <linux/list.h> |
33 | #include <linux/ip.h> |
34 | #include <linux/icmp.h> |
35 | #include <linux/netdevice.h> |
36 | #include <linux/jhash.h> |
37 | #include <linux/random.h> |
38 | #include <linux/slab.h> |
39 | #include <net/route.h> |
40 | #include <net/dst.h> |
41 | #include <net/sock.h> |
42 | #include <net/ip.h> |
43 | #include <net/icmp.h> |
44 | #include <net/checksum.h> |
45 | #include <net/inetpeer.h> |
46 | #include <net/inet_frag.h> |
47 | #include <linux/tcp.h> |
48 | #include <linux/udp.h> |
49 | #include <linux/inet.h> |
50 | #include <linux/netfilter_ipv4.h> |
51 | #include <net/inet_ecn.h> |
52 | #include <net/l3mdev.h> |
53 | |
54 | /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 |
55 | * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c |
56 | * as well. Or notify me, at least. --ANK |
57 | */ |
58 | static const char ip_frag_cache_name[] = "ip4-frags"; |
59 | |
60 | /* Describe an entry in the "incomplete datagrams" queue. */ |
61 | struct ipq { |
62 | struct inet_frag_queue q; |
63 | |
64 | u8 ecn; /* RFC3168 support */ |
65 | u16 max_df_size; /* largest frag with DF set seen */ |
66 | int iif; |
67 | unsigned int rid; |
68 | struct inet_peer *peer; |
69 | }; |
70 | |
71 | static u8 ip4_frag_ecn(u8 tos) |
72 | { |
73 | return 1 << (tos & INET_ECN_MASK); |
74 | } |
75 | |
76 | static struct inet_frags ip4_frags; |
77 | |
78 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, |
79 | struct sk_buff *prev_tail, struct net_device *dev); |
80 | |
81 | |
82 | static void ip4_frag_init(struct inet_frag_queue *q, const void *a) |
83 | { |
84 | struct ipq *qp = container_of(q, struct ipq, q); |
85 | struct net *net = q->fqdir->net; |
86 | |
87 | const struct frag_v4_compare_key *key = a; |
88 | |
89 | q->key.v4 = *key; |
90 | qp->ecn = 0; |
91 | qp->peer = q->fqdir->max_dist ? |
92 | inet_getpeer_v4(base: net->ipv4.peers, v4daddr: key->saddr, vif: key->vif, create: 1) : |
93 | NULL; |
94 | } |
95 | |
96 | static void ip4_frag_free(struct inet_frag_queue *q) |
97 | { |
98 | struct ipq *qp; |
99 | |
100 | qp = container_of(q, struct ipq, q); |
101 | if (qp->peer) |
102 | inet_putpeer(p: qp->peer); |
103 | } |
104 | |
105 | |
106 | /* Destruction primitives. */ |
107 | |
108 | static void ipq_put(struct ipq *ipq) |
109 | { |
110 | inet_frag_put(q: &ipq->q); |
111 | } |
112 | |
113 | /* Kill ipq entry. It is not destroyed immediately, |
114 | * because caller (and someone more) holds reference count. |
115 | */ |
116 | static void ipq_kill(struct ipq *ipq) |
117 | { |
118 | inet_frag_kill(q: &ipq->q); |
119 | } |
120 | |
121 | static bool frag_expire_skip_icmp(u32 user) |
122 | { |
123 | return user == IP_DEFRAG_AF_PACKET || |
124 | ip_defrag_user_in_between(user, lower_bond: IP_DEFRAG_CONNTRACK_IN, |
125 | upper_bond: __IP_DEFRAG_CONNTRACK_IN_END) || |
126 | ip_defrag_user_in_between(user, lower_bond: IP_DEFRAG_CONNTRACK_BRIDGE_IN, |
127 | upper_bond: __IP_DEFRAG_CONNTRACK_BRIDGE_IN); |
128 | } |
129 | |
130 | /* |
131 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. |
132 | */ |
133 | static void ip_expire(struct timer_list *t) |
134 | { |
135 | struct inet_frag_queue *frag = from_timer(frag, t, timer); |
136 | const struct iphdr *iph; |
137 | struct sk_buff *head = NULL; |
138 | struct net *net; |
139 | struct ipq *qp; |
140 | int err; |
141 | |
142 | qp = container_of(frag, struct ipq, q); |
143 | net = qp->q.fqdir->net; |
144 | |
145 | rcu_read_lock(); |
146 | |
147 | /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ |
148 | if (READ_ONCE(qp->q.fqdir->dead)) |
149 | goto out_rcu_unlock; |
150 | |
151 | spin_lock(lock: &qp->q.lock); |
152 | |
153 | if (qp->q.flags & INET_FRAG_COMPLETE) |
154 | goto out; |
155 | |
156 | qp->q.flags |= INET_FRAG_DROP; |
157 | ipq_kill(ipq: qp); |
158 | __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); |
159 | __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); |
160 | |
161 | if (!(qp->q.flags & INET_FRAG_FIRST_IN)) |
162 | goto out; |
163 | |
164 | /* sk_buff::dev and sk_buff::rbnode are unionized. So we |
165 | * pull the head out of the tree in order to be able to |
166 | * deal with head->dev. |
167 | */ |
168 | head = inet_frag_pull_head(q: &qp->q); |
169 | if (!head) |
170 | goto out; |
171 | head->dev = dev_get_by_index_rcu(net, ifindex: qp->iif); |
172 | if (!head->dev) |
173 | goto out; |
174 | |
175 | |
176 | /* skb has no dst, perform route lookup again */ |
177 | iph = ip_hdr(skb: head); |
178 | err = ip_route_input_noref(skb: head, dst: iph->daddr, src: iph->saddr, |
179 | tos: iph->tos, devin: head->dev); |
180 | if (err) |
181 | goto out; |
182 | |
183 | /* Only an end host needs to send an ICMP |
184 | * "Fragment Reassembly Timeout" message, per RFC792. |
185 | */ |
186 | if (frag_expire_skip_icmp(user: qp->q.key.v4.user) && |
187 | (skb_rtable(skb: head)->rt_type != RTN_LOCAL)) |
188 | goto out; |
189 | |
190 | spin_unlock(lock: &qp->q.lock); |
191 | icmp_send(skb_in: head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, info: 0); |
192 | goto out_rcu_unlock; |
193 | |
194 | out: |
195 | spin_unlock(lock: &qp->q.lock); |
196 | out_rcu_unlock: |
197 | rcu_read_unlock(); |
198 | kfree_skb_reason(skb: head, reason: SKB_DROP_REASON_FRAG_REASM_TIMEOUT); |
199 | ipq_put(ipq: qp); |
200 | } |
201 | |
202 | /* Find the correct entry in the "incomplete datagrams" queue for |
203 | * this IP datagram, and create new one, if nothing is found. |
204 | */ |
205 | static struct ipq *ip_find(struct net *net, struct iphdr *iph, |
206 | u32 user, int vif) |
207 | { |
208 | struct frag_v4_compare_key key = { |
209 | .saddr = iph->saddr, |
210 | .daddr = iph->daddr, |
211 | .user = user, |
212 | .vif = vif, |
213 | .id = iph->id, |
214 | .protocol = iph->protocol, |
215 | }; |
216 | struct inet_frag_queue *q; |
217 | |
218 | q = inet_frag_find(fqdir: net->ipv4.fqdir, key: &key); |
219 | if (!q) |
220 | return NULL; |
221 | |
222 | return container_of(q, struct ipq, q); |
223 | } |
224 | |
225 | /* Is the fragment too far ahead to be part of ipq? */ |
226 | static int ip_frag_too_far(struct ipq *qp) |
227 | { |
228 | struct inet_peer *peer = qp->peer; |
229 | unsigned int max = qp->q.fqdir->max_dist; |
230 | unsigned int start, end; |
231 | |
232 | int rc; |
233 | |
234 | if (!peer || !max) |
235 | return 0; |
236 | |
237 | start = qp->rid; |
238 | end = atomic_inc_return(v: &peer->rid); |
239 | qp->rid = end; |
240 | |
241 | rc = qp->q.fragments_tail && (end - start) > max; |
242 | |
243 | if (rc) |
244 | __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS); |
245 | |
246 | return rc; |
247 | } |
248 | |
249 | static int ip_frag_reinit(struct ipq *qp) |
250 | { |
251 | unsigned int sum_truesize = 0; |
252 | |
253 | if (!mod_timer(timer: &qp->q.timer, expires: jiffies + qp->q.fqdir->timeout)) { |
254 | refcount_inc(r: &qp->q.refcnt); |
255 | return -ETIMEDOUT; |
256 | } |
257 | |
258 | sum_truesize = inet_frag_rbtree_purge(root: &qp->q.rb_fragments, |
259 | reason: SKB_DROP_REASON_FRAG_TOO_FAR); |
260 | sub_frag_mem_limit(fqdir: qp->q.fqdir, val: sum_truesize); |
261 | |
262 | qp->q.flags = 0; |
263 | qp->q.len = 0; |
264 | qp->q.meat = 0; |
265 | qp->q.rb_fragments = RB_ROOT; |
266 | qp->q.fragments_tail = NULL; |
267 | qp->q.last_run_head = NULL; |
268 | qp->iif = 0; |
269 | qp->ecn = 0; |
270 | |
271 | return 0; |
272 | } |
273 | |
274 | /* Add new segment to existing queue. */ |
275 | static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
276 | { |
277 | struct net *net = qp->q.fqdir->net; |
278 | int ihl, end, flags, offset; |
279 | struct sk_buff *prev_tail; |
280 | struct net_device *dev; |
281 | unsigned int fragsize; |
282 | int err = -ENOENT; |
283 | SKB_DR(reason); |
284 | u8 ecn; |
285 | |
286 | /* If reassembly is already done, @skb must be a duplicate frag. */ |
287 | if (qp->q.flags & INET_FRAG_COMPLETE) { |
288 | SKB_DR_SET(reason, DUP_FRAG); |
289 | goto err; |
290 | } |
291 | |
292 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && |
293 | unlikely(ip_frag_too_far(qp)) && |
294 | unlikely(err = ip_frag_reinit(qp))) { |
295 | ipq_kill(ipq: qp); |
296 | goto err; |
297 | } |
298 | |
299 | ecn = ip4_frag_ecn(tos: ip_hdr(skb)->tos); |
300 | offset = ntohs(ip_hdr(skb)->frag_off); |
301 | flags = offset & ~IP_OFFSET; |
302 | offset &= IP_OFFSET; |
303 | offset <<= 3; /* offset is in 8-byte chunks */ |
304 | ihl = ip_hdrlen(skb); |
305 | |
306 | /* Determine the position of this fragment. */ |
307 | end = offset + skb->len - skb_network_offset(skb) - ihl; |
308 | err = -EINVAL; |
309 | |
310 | /* Is this the final fragment? */ |
311 | if ((flags & IP_MF) == 0) { |
312 | /* If we already have some bits beyond end |
313 | * or have different end, the segment is corrupted. |
314 | */ |
315 | if (end < qp->q.len || |
316 | ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) |
317 | goto discard_qp; |
318 | qp->q.flags |= INET_FRAG_LAST_IN; |
319 | qp->q.len = end; |
320 | } else { |
321 | if (end&7) { |
322 | end &= ~7; |
323 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
324 | skb->ip_summed = CHECKSUM_NONE; |
325 | } |
326 | if (end > qp->q.len) { |
327 | /* Some bits beyond end -> corruption. */ |
328 | if (qp->q.flags & INET_FRAG_LAST_IN) |
329 | goto discard_qp; |
330 | qp->q.len = end; |
331 | } |
332 | } |
333 | if (end == offset) |
334 | goto discard_qp; |
335 | |
336 | err = -ENOMEM; |
337 | if (!pskb_pull(skb, len: skb_network_offset(skb) + ihl)) |
338 | goto discard_qp; |
339 | |
340 | err = pskb_trim_rcsum(skb, len: end - offset); |
341 | if (err) |
342 | goto discard_qp; |
343 | |
344 | /* Note : skb->rbnode and skb->dev share the same location. */ |
345 | dev = skb->dev; |
346 | /* Makes sure compiler wont do silly aliasing games */ |
347 | barrier(); |
348 | |
349 | prev_tail = qp->q.fragments_tail; |
350 | err = inet_frag_queue_insert(q: &qp->q, skb, offset, end); |
351 | if (err) |
352 | goto insert_error; |
353 | |
354 | if (dev) |
355 | qp->iif = dev->ifindex; |
356 | |
357 | qp->q.stamp = skb->tstamp; |
358 | qp->q.mono_delivery_time = skb->mono_delivery_time; |
359 | qp->q.meat += skb->len; |
360 | qp->ecn |= ecn; |
361 | add_frag_mem_limit(fqdir: qp->q.fqdir, val: skb->truesize); |
362 | if (offset == 0) |
363 | qp->q.flags |= INET_FRAG_FIRST_IN; |
364 | |
365 | fragsize = skb->len + ihl; |
366 | |
367 | if (fragsize > qp->q.max_size) |
368 | qp->q.max_size = fragsize; |
369 | |
370 | if (ip_hdr(skb)->frag_off & htons(IP_DF) && |
371 | fragsize > qp->max_df_size) |
372 | qp->max_df_size = fragsize; |
373 | |
374 | if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && |
375 | qp->q.meat == qp->q.len) { |
376 | unsigned long orefdst = skb->_skb_refdst; |
377 | |
378 | skb->_skb_refdst = 0UL; |
379 | err = ip_frag_reasm(qp, skb, prev_tail, dev); |
380 | skb->_skb_refdst = orefdst; |
381 | if (err) |
382 | inet_frag_kill(q: &qp->q); |
383 | return err; |
384 | } |
385 | |
386 | skb_dst_drop(skb); |
387 | skb_orphan(skb); |
388 | return -EINPROGRESS; |
389 | |
390 | insert_error: |
391 | if (err == IPFRAG_DUP) { |
392 | SKB_DR_SET(reason, DUP_FRAG); |
393 | err = -EINVAL; |
394 | goto err; |
395 | } |
396 | err = -EINVAL; |
397 | __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); |
398 | discard_qp: |
399 | inet_frag_kill(q: &qp->q); |
400 | __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); |
401 | err: |
402 | kfree_skb_reason(skb, reason); |
403 | return err; |
404 | } |
405 | |
406 | static bool ip_frag_coalesce_ok(const struct ipq *qp) |
407 | { |
408 | return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER; |
409 | } |
410 | |
411 | /* Build a new IP datagram from all its fragments. */ |
412 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, |
413 | struct sk_buff *prev_tail, struct net_device *dev) |
414 | { |
415 | struct net *net = qp->q.fqdir->net; |
416 | struct iphdr *iph; |
417 | void *reasm_data; |
418 | int len, err; |
419 | u8 ecn; |
420 | |
421 | ipq_kill(ipq: qp); |
422 | |
423 | ecn = ip_frag_ecn_table[qp->ecn]; |
424 | if (unlikely(ecn == 0xff)) { |
425 | err = -EINVAL; |
426 | goto out_fail; |
427 | } |
428 | |
429 | /* Make the one we just received the head. */ |
430 | reasm_data = inet_frag_reasm_prepare(q: &qp->q, skb, parent: prev_tail); |
431 | if (!reasm_data) |
432 | goto out_nomem; |
433 | |
434 | len = ip_hdrlen(skb) + qp->q.len; |
435 | err = -E2BIG; |
436 | if (len > 65535) |
437 | goto out_oversize; |
438 | |
439 | inet_frag_reasm_finish(q: &qp->q, head: skb, reasm_data, |
440 | try_coalesce: ip_frag_coalesce_ok(qp)); |
441 | |
442 | skb->dev = dev; |
443 | IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); |
444 | |
445 | iph = ip_hdr(skb); |
446 | iph->tot_len = htons(len); |
447 | iph->tos |= ecn; |
448 | |
449 | /* When we set IP_DF on a refragmented skb we must also force a |
450 | * call to ip_fragment to avoid forwarding a DF-skb of size s while |
451 | * original sender only sent fragments of size f (where f < s). |
452 | * |
453 | * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest |
454 | * frag seen to avoid sending tiny DF-fragments in case skb was built |
455 | * from one very small df-fragment and one large non-df frag. |
456 | */ |
457 | if (qp->max_df_size == qp->q.max_size) { |
458 | IPCB(skb)->flags |= IPSKB_FRAG_PMTU; |
459 | iph->frag_off = htons(IP_DF); |
460 | } else { |
461 | iph->frag_off = 0; |
462 | } |
463 | |
464 | ip_send_check(ip: iph); |
465 | |
466 | __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); |
467 | qp->q.rb_fragments = RB_ROOT; |
468 | qp->q.fragments_tail = NULL; |
469 | qp->q.last_run_head = NULL; |
470 | return 0; |
471 | |
472 | out_nomem: |
473 | net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp); |
474 | err = -ENOMEM; |
475 | goto out_fail; |
476 | out_oversize: |
477 | net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr); |
478 | out_fail: |
479 | __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); |
480 | return err; |
481 | } |
482 | |
483 | /* Process an incoming IP datagram fragment. */ |
484 | int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) |
485 | { |
486 | struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; |
487 | int vif = l3mdev_master_ifindex_rcu(dev); |
488 | struct ipq *qp; |
489 | |
490 | __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); |
491 | |
492 | /* Lookup (or create) queue header */ |
493 | qp = ip_find(net, iph: ip_hdr(skb), user, vif); |
494 | if (qp) { |
495 | int ret; |
496 | |
497 | spin_lock(lock: &qp->q.lock); |
498 | |
499 | ret = ip_frag_queue(qp, skb); |
500 | |
501 | spin_unlock(lock: &qp->q.lock); |
502 | ipq_put(ipq: qp); |
503 | return ret; |
504 | } |
505 | |
506 | __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); |
507 | kfree_skb(skb); |
508 | return -ENOMEM; |
509 | } |
510 | EXPORT_SYMBOL(ip_defrag); |
511 | |
512 | struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) |
513 | { |
514 | struct iphdr iph; |
515 | int netoff; |
516 | u32 len; |
517 | |
518 | if (skb->protocol != htons(ETH_P_IP)) |
519 | return skb; |
520 | |
521 | netoff = skb_network_offset(skb); |
522 | |
523 | if (skb_copy_bits(skb, offset: netoff, to: &iph, len: sizeof(iph)) < 0) |
524 | return skb; |
525 | |
526 | if (iph.ihl < 5 || iph.version != 4) |
527 | return skb; |
528 | |
529 | len = ntohs(iph.tot_len); |
530 | if (skb->len < netoff + len || len < (iph.ihl * 4)) |
531 | return skb; |
532 | |
533 | if (ip_is_fragment(iph: &iph)) { |
534 | skb = skb_share_check(skb, GFP_ATOMIC); |
535 | if (skb) { |
536 | if (!pskb_may_pull(skb, len: netoff + iph.ihl * 4)) { |
537 | kfree_skb(skb); |
538 | return NULL; |
539 | } |
540 | if (pskb_trim_rcsum(skb, len: netoff + len)) { |
541 | kfree_skb(skb); |
542 | return NULL; |
543 | } |
544 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); |
545 | if (ip_defrag(net, skb, user)) |
546 | return NULL; |
547 | skb_clear_hash(skb); |
548 | } |
549 | } |
550 | return skb; |
551 | } |
552 | EXPORT_SYMBOL(ip_check_defrag); |
553 | |
554 | #ifdef CONFIG_SYSCTL |
555 | static int dist_min; |
556 | |
557 | static struct ctl_table ip4_frags_ns_ctl_table[] = { |
558 | { |
559 | .procname = "ipfrag_high_thresh", |
560 | .maxlen = sizeof(unsigned long), |
561 | .mode = 0644, |
562 | .proc_handler = proc_doulongvec_minmax, |
563 | }, |
564 | { |
565 | .procname = "ipfrag_low_thresh", |
566 | .maxlen = sizeof(unsigned long), |
567 | .mode = 0644, |
568 | .proc_handler = proc_doulongvec_minmax, |
569 | }, |
570 | { |
571 | .procname = "ipfrag_time", |
572 | .maxlen = sizeof(int), |
573 | .mode = 0644, |
574 | .proc_handler = proc_dointvec_jiffies, |
575 | }, |
576 | { |
577 | .procname = "ipfrag_max_dist", |
578 | .maxlen = sizeof(int), |
579 | .mode = 0644, |
580 | .proc_handler = proc_dointvec_minmax, |
581 | .extra1 = &dist_min, |
582 | }, |
583 | { } |
584 | }; |
585 | |
586 | /* secret interval has been deprecated */ |
587 | static int ip4_frags_secret_interval_unused; |
588 | static struct ctl_table ip4_frags_ctl_table[] = { |
589 | { |
590 | .procname = "ipfrag_secret_interval", |
591 | .data = &ip4_frags_secret_interval_unused, |
592 | .maxlen = sizeof(int), |
593 | .mode = 0644, |
594 | .proc_handler = proc_dointvec_jiffies, |
595 | }, |
596 | { } |
597 | }; |
598 | |
599 | static int __net_init ip4_frags_ns_ctl_register(struct net *net) |
600 | { |
601 | struct ctl_table *table; |
602 | struct ctl_table_header *hdr; |
603 | |
604 | table = ip4_frags_ns_ctl_table; |
605 | if (!net_eq(net1: net, net2: &init_net)) { |
606 | table = kmemdup(p: table, size: sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); |
607 | if (!table) |
608 | goto err_alloc; |
609 | |
610 | } |
611 | table[0].data = &net->ipv4.fqdir->high_thresh; |
612 | table[0].extra1 = &net->ipv4.fqdir->low_thresh; |
613 | table[1].data = &net->ipv4.fqdir->low_thresh; |
614 | table[1].extra2 = &net->ipv4.fqdir->high_thresh; |
615 | table[2].data = &net->ipv4.fqdir->timeout; |
616 | table[3].data = &net->ipv4.fqdir->max_dist; |
617 | |
618 | hdr = register_net_sysctl_sz(net, path: "net/ipv4", table, |
619 | ARRAY_SIZE(ip4_frags_ns_ctl_table)); |
620 | if (!hdr) |
621 | goto err_reg; |
622 | |
623 | net->ipv4.frags_hdr = hdr; |
624 | return 0; |
625 | |
626 | err_reg: |
627 | if (!net_eq(net1: net, net2: &init_net)) |
628 | kfree(objp: table); |
629 | err_alloc: |
630 | return -ENOMEM; |
631 | } |
632 | |
633 | static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) |
634 | { |
635 | struct ctl_table *table; |
636 | |
637 | table = net->ipv4.frags_hdr->ctl_table_arg; |
638 | unregister_net_sysctl_table(header: net->ipv4.frags_hdr); |
639 | kfree(objp: table); |
640 | } |
641 | |
642 | static void __init ip4_frags_ctl_register(void) |
643 | { |
644 | register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); |
645 | } |
646 | #else |
647 | static int ip4_frags_ns_ctl_register(struct net *net) |
648 | { |
649 | return 0; |
650 | } |
651 | |
652 | static void ip4_frags_ns_ctl_unregister(struct net *net) |
653 | { |
654 | } |
655 | |
656 | static void __init ip4_frags_ctl_register(void) |
657 | { |
658 | } |
659 | #endif |
660 | |
661 | static int __net_init ipv4_frags_init_net(struct net *net) |
662 | { |
663 | int res; |
664 | |
665 | res = fqdir_init(fqdirp: &net->ipv4.fqdir, f: &ip4_frags, net); |
666 | if (res < 0) |
667 | return res; |
668 | /* Fragment cache limits. |
669 | * |
670 | * The fragment memory accounting code, (tries to) account for |
671 | * the real memory usage, by measuring both the size of frag |
672 | * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue)) |
673 | * and the SKB's truesize. |
674 | * |
675 | * A 64K fragment consumes 129736 bytes (44*2944)+200 |
676 | * (1500 truesize == 2944, sizeof(struct ipq) == 200) |
677 | * |
678 | * We will commit 4MB at one time. Should we cross that limit |
679 | * we will prune down to 3MB, making room for approx 8 big 64K |
680 | * fragments 8x128k. |
681 | */ |
682 | net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024; |
683 | net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024; |
684 | /* |
685 | * Important NOTE! Fragment queue must be destroyed before MSL expires. |
686 | * RFC791 is wrong proposing to prolongate timer each fragment arrival |
687 | * by TTL. |
688 | */ |
689 | net->ipv4.fqdir->timeout = IP_FRAG_TIME; |
690 | |
691 | net->ipv4.fqdir->max_dist = 64; |
692 | |
693 | res = ip4_frags_ns_ctl_register(net); |
694 | if (res < 0) |
695 | fqdir_exit(fqdir: net->ipv4.fqdir); |
696 | return res; |
697 | } |
698 | |
699 | static void __net_exit ipv4_frags_pre_exit_net(struct net *net) |
700 | { |
701 | fqdir_pre_exit(fqdir: net->ipv4.fqdir); |
702 | } |
703 | |
704 | static void __net_exit ipv4_frags_exit_net(struct net *net) |
705 | { |
706 | ip4_frags_ns_ctl_unregister(net); |
707 | fqdir_exit(fqdir: net->ipv4.fqdir); |
708 | } |
709 | |
710 | static struct pernet_operations ip4_frags_ops = { |
711 | .init = ipv4_frags_init_net, |
712 | .pre_exit = ipv4_frags_pre_exit_net, |
713 | .exit = ipv4_frags_exit_net, |
714 | }; |
715 | |
716 | |
717 | static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed) |
718 | { |
719 | return jhash2(k: data, |
720 | length: sizeof(struct frag_v4_compare_key) / sizeof(u32), initval: seed); |
721 | } |
722 | |
723 | static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed) |
724 | { |
725 | const struct inet_frag_queue *fq = data; |
726 | |
727 | return jhash2(k: (const u32 *)&fq->key.v4, |
728 | length: sizeof(struct frag_v4_compare_key) / sizeof(u32), initval: seed); |
729 | } |
730 | |
731 | static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) |
732 | { |
733 | const struct frag_v4_compare_key *key = arg->key; |
734 | const struct inet_frag_queue *fq = ptr; |
735 | |
736 | return !!memcmp(p: &fq->key, q: key, size: sizeof(*key)); |
737 | } |
738 | |
739 | static const struct rhashtable_params ip4_rhash_params = { |
740 | .head_offset = offsetof(struct inet_frag_queue, node), |
741 | .key_offset = offsetof(struct inet_frag_queue, key), |
742 | .key_len = sizeof(struct frag_v4_compare_key), |
743 | .hashfn = ip4_key_hashfn, |
744 | .obj_hashfn = ip4_obj_hashfn, |
745 | .obj_cmpfn = ip4_obj_cmpfn, |
746 | .automatic_shrinking = true, |
747 | }; |
748 | |
749 | void __init ipfrag_init(void) |
750 | { |
751 | ip4_frags.constructor = ip4_frag_init; |
752 | ip4_frags.destructor = ip4_frag_free; |
753 | ip4_frags.qsize = sizeof(struct ipq); |
754 | ip4_frags.frag_expire = ip_expire; |
755 | ip4_frags.frags_cache_name = ip_frag_cache_name; |
756 | ip4_frags.rhash_params = ip4_rhash_params; |
757 | if (inet_frags_init(&ip4_frags)) |
758 | panic(fmt: "IP: failed to allocate ip4_frags cache\n"); |
759 | ip4_frags_ctl_register(); |
760 | register_pernet_subsys(&ip4_frags_ops); |
761 | } |
762 |
Definitions
- ip_frag_cache_name
- ipq
- ip4_frag_ecn
- ip4_frags
- ip4_frag_init
- ip4_frag_free
- ipq_put
- ipq_kill
- frag_expire_skip_icmp
- ip_expire
- ip_find
- ip_frag_too_far
- ip_frag_reinit
- ip_frag_queue
- ip_frag_coalesce_ok
- ip_frag_reasm
- ip_defrag
- ip_check_defrag
- dist_min
- ip4_frags_ns_ctl_table
- ip4_frags_secret_interval_unused
- ip4_frags_ctl_table
- ip4_frags_ns_ctl_register
- ip4_frags_ns_ctl_unregister
- ip4_frags_ctl_register
- ipv4_frags_init_net
- ipv4_frags_pre_exit_net
- ipv4_frags_exit_net
- ip4_frags_ops
- ip4_key_hashfn
- ip4_obj_hashfn
- ip4_obj_cmpfn
- ip4_rhash_params
Improve your Profiling and Debugging skills
Find out more