1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * inet fragments management |
4 | * |
5 | * Authors: Pavel Emelyanov <xemul@openvz.org> |
6 | * Started as consolidation of ipv4/ip_fragment.c, |
7 | * ipv6/reassembly. and ipv6 nf conntrack reassembly |
8 | */ |
9 | |
10 | #include <linux/list.h> |
11 | #include <linux/spinlock.h> |
12 | #include <linux/module.h> |
13 | #include <linux/timer.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/random.h> |
16 | #include <linux/skbuff.h> |
17 | #include <linux/rtnetlink.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/rhashtable.h> |
20 | |
21 | #include <net/sock.h> |
22 | #include <net/inet_frag.h> |
23 | #include <net/inet_ecn.h> |
24 | #include <net/ip.h> |
25 | #include <net/ipv6.h> |
26 | |
27 | #include "../core/sock_destructor.h" |
28 | |
29 | /* Use skb->cb to track consecutive/adjacent fragments coming at |
30 | * the end of the queue. Nodes in the rb-tree queue will |
31 | * contain "runs" of one or more adjacent fragments. |
32 | * |
33 | * Invariants: |
34 | * - next_frag is NULL at the tail of a "run"; |
35 | * - the head of a "run" has the sum of all fragment lengths in frag_run_len. |
36 | */ |
37 | struct ipfrag_skb_cb { |
38 | union { |
39 | struct inet_skb_parm h4; |
40 | struct inet6_skb_parm h6; |
41 | }; |
42 | struct sk_buff *next_frag; |
43 | int frag_run_len; |
44 | int ip_defrag_offset; |
45 | }; |
46 | |
47 | #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) |
48 | |
49 | static void fragcb_clear(struct sk_buff *skb) |
50 | { |
51 | RB_CLEAR_NODE(&skb->rbnode); |
52 | FRAG_CB(skb)->next_frag = NULL; |
53 | FRAG_CB(skb)->frag_run_len = skb->len; |
54 | } |
55 | |
56 | /* Append skb to the last "run". */ |
57 | static void fragrun_append_to_last(struct inet_frag_queue *q, |
58 | struct sk_buff *skb) |
59 | { |
60 | fragcb_clear(skb); |
61 | |
62 | FRAG_CB(q->last_run_head)->frag_run_len += skb->len; |
63 | FRAG_CB(q->fragments_tail)->next_frag = skb; |
64 | q->fragments_tail = skb; |
65 | } |
66 | |
67 | /* Create a new "run" with the skb. */ |
68 | static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) |
69 | { |
70 | BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); |
71 | fragcb_clear(skb); |
72 | |
73 | if (q->last_run_head) |
74 | rb_link_node(node: &skb->rbnode, parent: &q->last_run_head->rbnode, |
75 | rb_link: &q->last_run_head->rbnode.rb_right); |
76 | else |
77 | rb_link_node(node: &skb->rbnode, NULL, rb_link: &q->rb_fragments.rb_node); |
78 | rb_insert_color(&skb->rbnode, &q->rb_fragments); |
79 | |
80 | q->fragments_tail = skb; |
81 | q->last_run_head = skb; |
82 | } |
83 | |
84 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements |
85 | * Value : 0xff if frame should be dropped. |
86 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field |
87 | */ |
88 | const u8 ip_frag_ecn_table[16] = { |
89 | /* at least one fragment had CE, and others ECT_0 or ECT_1 */ |
90 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, |
91 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, |
92 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, |
93 | |
94 | /* invalid combinations : drop frame */ |
95 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, |
96 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, |
97 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, |
98 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, |
99 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, |
100 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, |
101 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, |
102 | }; |
103 | EXPORT_SYMBOL(ip_frag_ecn_table); |
104 | |
105 | int inet_frags_init(struct inet_frags *f) |
106 | { |
107 | f->frags_cachep = kmem_cache_create(name: f->frags_cache_name, size: f->qsize, align: 0, flags: 0, |
108 | NULL); |
109 | if (!f->frags_cachep) |
110 | return -ENOMEM; |
111 | |
112 | refcount_set(r: &f->refcnt, n: 1); |
113 | init_completion(x: &f->completion); |
114 | return 0; |
115 | } |
116 | EXPORT_SYMBOL(inet_frags_init); |
117 | |
118 | void inet_frags_fini(struct inet_frags *f) |
119 | { |
120 | if (refcount_dec_and_test(r: &f->refcnt)) |
121 | complete(&f->completion); |
122 | |
123 | wait_for_completion(&f->completion); |
124 | |
125 | kmem_cache_destroy(s: f->frags_cachep); |
126 | f->frags_cachep = NULL; |
127 | } |
128 | EXPORT_SYMBOL(inet_frags_fini); |
129 | |
130 | /* called from rhashtable_free_and_destroy() at netns_frags dismantle */ |
131 | static void inet_frags_free_cb(void *ptr, void *arg) |
132 | { |
133 | struct inet_frag_queue *fq = ptr; |
134 | int count; |
135 | |
136 | count = del_timer_sync(timer: &fq->timer) ? 1 : 0; |
137 | |
138 | spin_lock_bh(lock: &fq->lock); |
139 | fq->flags |= INET_FRAG_DROP; |
140 | if (!(fq->flags & INET_FRAG_COMPLETE)) { |
141 | fq->flags |= INET_FRAG_COMPLETE; |
142 | count++; |
143 | } else if (fq->flags & INET_FRAG_HASH_DEAD) { |
144 | count++; |
145 | } |
146 | spin_unlock_bh(lock: &fq->lock); |
147 | |
148 | if (refcount_sub_and_test(i: count, r: &fq->refcnt)) |
149 | inet_frag_destroy(q: fq); |
150 | } |
151 | |
152 | static LLIST_HEAD(fqdir_free_list); |
153 | |
154 | static void fqdir_free_fn(struct work_struct *work) |
155 | { |
156 | struct llist_node *kill_list; |
157 | struct fqdir *fqdir, *tmp; |
158 | struct inet_frags *f; |
159 | |
160 | /* Atomically snapshot the list of fqdirs to free */ |
161 | kill_list = llist_del_all(head: &fqdir_free_list); |
162 | |
163 | /* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu) |
164 | * have completed, since they need to dereference fqdir. |
165 | * Would it not be nice to have kfree_rcu_barrier() ? :) |
166 | */ |
167 | rcu_barrier(); |
168 | |
169 | llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) { |
170 | f = fqdir->f; |
171 | if (refcount_dec_and_test(r: &f->refcnt)) |
172 | complete(&f->completion); |
173 | |
174 | kfree(objp: fqdir); |
175 | } |
176 | } |
177 | |
178 | static DECLARE_WORK(fqdir_free_work, fqdir_free_fn); |
179 | |
180 | static void fqdir_work_fn(struct work_struct *work) |
181 | { |
182 | struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); |
183 | |
184 | rhashtable_free_and_destroy(ht: &fqdir->rhashtable, free_fn: inet_frags_free_cb, NULL); |
185 | |
186 | if (llist_add(new: &fqdir->free_list, head: &fqdir_free_list)) |
187 | queue_work(wq: system_wq, work: &fqdir_free_work); |
188 | } |
189 | |
190 | int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net) |
191 | { |
192 | struct fqdir *fqdir = kzalloc(size: sizeof(*fqdir), GFP_KERNEL); |
193 | int res; |
194 | |
195 | if (!fqdir) |
196 | return -ENOMEM; |
197 | fqdir->f = f; |
198 | fqdir->net = net; |
199 | res = rhashtable_init(ht: &fqdir->rhashtable, params: &fqdir->f->rhash_params); |
200 | if (res < 0) { |
201 | kfree(objp: fqdir); |
202 | return res; |
203 | } |
204 | refcount_inc(r: &f->refcnt); |
205 | *fqdirp = fqdir; |
206 | return 0; |
207 | } |
208 | EXPORT_SYMBOL(fqdir_init); |
209 | |
210 | static struct workqueue_struct *inet_frag_wq; |
211 | |
212 | static int __init inet_frag_wq_init(void) |
213 | { |
214 | inet_frag_wq = create_workqueue("inet_frag_wq" ); |
215 | if (!inet_frag_wq) |
216 | panic(fmt: "Could not create inet frag workq" ); |
217 | return 0; |
218 | } |
219 | |
220 | pure_initcall(inet_frag_wq_init); |
221 | |
222 | void fqdir_exit(struct fqdir *fqdir) |
223 | { |
224 | INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); |
225 | queue_work(wq: inet_frag_wq, work: &fqdir->destroy_work); |
226 | } |
227 | EXPORT_SYMBOL(fqdir_exit); |
228 | |
229 | void inet_frag_kill(struct inet_frag_queue *fq) |
230 | { |
231 | if (del_timer(timer: &fq->timer)) |
232 | refcount_dec(r: &fq->refcnt); |
233 | |
234 | if (!(fq->flags & INET_FRAG_COMPLETE)) { |
235 | struct fqdir *fqdir = fq->fqdir; |
236 | |
237 | fq->flags |= INET_FRAG_COMPLETE; |
238 | rcu_read_lock(); |
239 | /* The RCU read lock provides a memory barrier |
240 | * guaranteeing that if fqdir->dead is false then |
241 | * the hash table destruction will not start until |
242 | * after we unlock. Paired with fqdir_pre_exit(). |
243 | */ |
244 | if (!READ_ONCE(fqdir->dead)) { |
245 | rhashtable_remove_fast(ht: &fqdir->rhashtable, obj: &fq->node, |
246 | params: fqdir->f->rhash_params); |
247 | refcount_dec(r: &fq->refcnt); |
248 | } else { |
249 | fq->flags |= INET_FRAG_HASH_DEAD; |
250 | } |
251 | rcu_read_unlock(); |
252 | } |
253 | } |
254 | EXPORT_SYMBOL(inet_frag_kill); |
255 | |
256 | static void inet_frag_destroy_rcu(struct rcu_head *head) |
257 | { |
258 | struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, |
259 | rcu); |
260 | struct inet_frags *f = q->fqdir->f; |
261 | |
262 | if (f->destructor) |
263 | f->destructor(q); |
264 | kmem_cache_free(s: f->frags_cachep, objp: q); |
265 | } |
266 | |
267 | unsigned int inet_frag_rbtree_purge(struct rb_root *root, |
268 | enum skb_drop_reason reason) |
269 | { |
270 | struct rb_node *p = rb_first(root); |
271 | unsigned int sum = 0; |
272 | |
273 | while (p) { |
274 | struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); |
275 | |
276 | p = rb_next(p); |
277 | rb_erase(&skb->rbnode, root); |
278 | while (skb) { |
279 | struct sk_buff *next = FRAG_CB(skb)->next_frag; |
280 | |
281 | sum += skb->truesize; |
282 | kfree_skb_reason(skb, reason); |
283 | skb = next; |
284 | } |
285 | } |
286 | return sum; |
287 | } |
288 | EXPORT_SYMBOL(inet_frag_rbtree_purge); |
289 | |
290 | void inet_frag_destroy(struct inet_frag_queue *q) |
291 | { |
292 | unsigned int sum, sum_truesize = 0; |
293 | enum skb_drop_reason reason; |
294 | struct inet_frags *f; |
295 | struct fqdir *fqdir; |
296 | |
297 | WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); |
298 | reason = (q->flags & INET_FRAG_DROP) ? |
299 | SKB_DROP_REASON_FRAG_REASM_TIMEOUT : |
300 | SKB_CONSUMED; |
301 | WARN_ON(del_timer(&q->timer) != 0); |
302 | |
303 | /* Release all fragment data. */ |
304 | fqdir = q->fqdir; |
305 | f = fqdir->f; |
306 | sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments, reason); |
307 | sum = sum_truesize + f->qsize; |
308 | |
309 | call_rcu(head: &q->rcu, func: inet_frag_destroy_rcu); |
310 | |
311 | sub_frag_mem_limit(fqdir, val: sum); |
312 | } |
313 | EXPORT_SYMBOL(inet_frag_destroy); |
314 | |
315 | static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, |
316 | struct inet_frags *f, |
317 | void *arg) |
318 | { |
319 | struct inet_frag_queue *q; |
320 | |
321 | q = kmem_cache_zalloc(k: f->frags_cachep, GFP_ATOMIC); |
322 | if (!q) |
323 | return NULL; |
324 | |
325 | q->fqdir = fqdir; |
326 | f->constructor(q, arg); |
327 | add_frag_mem_limit(fqdir, val: f->qsize); |
328 | |
329 | timer_setup(&q->timer, f->frag_expire, 0); |
330 | spin_lock_init(&q->lock); |
331 | refcount_set(r: &q->refcnt, n: 3); |
332 | |
333 | return q; |
334 | } |
335 | |
336 | static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, |
337 | void *arg, |
338 | struct inet_frag_queue **prev) |
339 | { |
340 | struct inet_frags *f = fqdir->f; |
341 | struct inet_frag_queue *q; |
342 | |
343 | q = inet_frag_alloc(fqdir, f, arg); |
344 | if (!q) { |
345 | *prev = ERR_PTR(error: -ENOMEM); |
346 | return NULL; |
347 | } |
348 | mod_timer(timer: &q->timer, expires: jiffies + fqdir->timeout); |
349 | |
350 | *prev = rhashtable_lookup_get_insert_key(ht: &fqdir->rhashtable, key: &q->key, |
351 | obj: &q->node, params: f->rhash_params); |
352 | if (*prev) { |
353 | q->flags |= INET_FRAG_COMPLETE; |
354 | inet_frag_kill(q); |
355 | inet_frag_destroy(q); |
356 | return NULL; |
357 | } |
358 | return q; |
359 | } |
360 | |
361 | /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ |
362 | struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) |
363 | { |
364 | /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ |
365 | long high_thresh = READ_ONCE(fqdir->high_thresh); |
366 | struct inet_frag_queue *fq = NULL, *prev; |
367 | |
368 | if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) |
369 | return NULL; |
370 | |
371 | rcu_read_lock(); |
372 | |
373 | prev = rhashtable_lookup(ht: &fqdir->rhashtable, key, params: fqdir->f->rhash_params); |
374 | if (!prev) |
375 | fq = inet_frag_create(fqdir, arg: key, prev: &prev); |
376 | if (!IS_ERR_OR_NULL(ptr: prev)) { |
377 | fq = prev; |
378 | if (!refcount_inc_not_zero(r: &fq->refcnt)) |
379 | fq = NULL; |
380 | } |
381 | rcu_read_unlock(); |
382 | return fq; |
383 | } |
384 | EXPORT_SYMBOL(inet_frag_find); |
385 | |
386 | int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, |
387 | int offset, int end) |
388 | { |
389 | struct sk_buff *last = q->fragments_tail; |
390 | |
391 | /* RFC5722, Section 4, amended by Errata ID : 3089 |
392 | * When reassembling an IPv6 datagram, if |
393 | * one or more its constituent fragments is determined to be an |
394 | * overlapping fragment, the entire datagram (and any constituent |
395 | * fragments) MUST be silently discarded. |
396 | * |
397 | * Duplicates, however, should be ignored (i.e. skb dropped, but the |
398 | * queue/fragments kept for later reassembly). |
399 | */ |
400 | if (!last) |
401 | fragrun_create(q, skb); /* First fragment. */ |
402 | else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { |
403 | /* This is the common case: skb goes to the end. */ |
404 | /* Detect and discard overlaps. */ |
405 | if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) |
406 | return IPFRAG_OVERLAP; |
407 | if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) |
408 | fragrun_append_to_last(q, skb); |
409 | else |
410 | fragrun_create(q, skb); |
411 | } else { |
412 | /* Binary search. Note that skb can become the first fragment, |
413 | * but not the last (covered above). |
414 | */ |
415 | struct rb_node **rbn, *parent; |
416 | |
417 | rbn = &q->rb_fragments.rb_node; |
418 | do { |
419 | struct sk_buff *curr; |
420 | int curr_run_end; |
421 | |
422 | parent = *rbn; |
423 | curr = rb_to_skb(parent); |
424 | curr_run_end = FRAG_CB(curr)->ip_defrag_offset + |
425 | FRAG_CB(curr)->frag_run_len; |
426 | if (end <= FRAG_CB(curr)->ip_defrag_offset) |
427 | rbn = &parent->rb_left; |
428 | else if (offset >= curr_run_end) |
429 | rbn = &parent->rb_right; |
430 | else if (offset >= FRAG_CB(curr)->ip_defrag_offset && |
431 | end <= curr_run_end) |
432 | return IPFRAG_DUP; |
433 | else |
434 | return IPFRAG_OVERLAP; |
435 | } while (*rbn); |
436 | /* Here we have parent properly set, and rbn pointing to |
437 | * one of its NULL left/right children. Insert skb. |
438 | */ |
439 | fragcb_clear(skb); |
440 | rb_link_node(node: &skb->rbnode, parent, rb_link: rbn); |
441 | rb_insert_color(&skb->rbnode, &q->rb_fragments); |
442 | } |
443 | |
444 | FRAG_CB(skb)->ip_defrag_offset = offset; |
445 | |
446 | return IPFRAG_OK; |
447 | } |
448 | EXPORT_SYMBOL(inet_frag_queue_insert); |
449 | |
450 | void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, |
451 | struct sk_buff *parent) |
452 | { |
453 | struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); |
454 | void (*destructor)(struct sk_buff *); |
455 | unsigned int orig_truesize = 0; |
456 | struct sk_buff **nextp = NULL; |
457 | struct sock *sk = skb->sk; |
458 | int delta; |
459 | |
460 | if (sk && is_skb_wmem(skb)) { |
461 | /* TX: skb->sk might have been passed as argument to |
462 | * dst->output and must remain valid until tx completes. |
463 | * |
464 | * Move sk to reassembled skb and fix up wmem accounting. |
465 | */ |
466 | orig_truesize = skb->truesize; |
467 | destructor = skb->destructor; |
468 | } |
469 | |
470 | if (head != skb) { |
471 | fp = skb_clone(skb, GFP_ATOMIC); |
472 | if (!fp) { |
473 | head = skb; |
474 | goto out_restore_sk; |
475 | } |
476 | FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; |
477 | if (RB_EMPTY_NODE(&skb->rbnode)) |
478 | FRAG_CB(parent)->next_frag = fp; |
479 | else |
480 | rb_replace_node(victim: &skb->rbnode, new: &fp->rbnode, |
481 | root: &q->rb_fragments); |
482 | if (q->fragments_tail == skb) |
483 | q->fragments_tail = fp; |
484 | |
485 | if (orig_truesize) { |
486 | /* prevent skb_morph from releasing sk */ |
487 | skb->sk = NULL; |
488 | skb->destructor = NULL; |
489 | } |
490 | skb_morph(dst: skb, src: head); |
491 | FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; |
492 | rb_replace_node(victim: &head->rbnode, new: &skb->rbnode, |
493 | root: &q->rb_fragments); |
494 | consume_skb(skb: head); |
495 | head = skb; |
496 | } |
497 | WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); |
498 | |
499 | delta = -head->truesize; |
500 | |
501 | /* Head of list must not be cloned. */ |
502 | if (skb_unclone(skb: head, GFP_ATOMIC)) |
503 | goto out_restore_sk; |
504 | |
505 | delta += head->truesize; |
506 | if (delta) |
507 | add_frag_mem_limit(fqdir: q->fqdir, val: delta); |
508 | |
509 | /* If the first fragment is fragmented itself, we split |
510 | * it to two chunks: the first with data and paged part |
511 | * and the second, holding only fragments. |
512 | */ |
513 | if (skb_has_frag_list(skb: head)) { |
514 | struct sk_buff *clone; |
515 | int i, plen = 0; |
516 | |
517 | clone = alloc_skb(size: 0, GFP_ATOMIC); |
518 | if (!clone) |
519 | goto out_restore_sk; |
520 | skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; |
521 | skb_frag_list_init(skb: head); |
522 | for (i = 0; i < skb_shinfo(head)->nr_frags; i++) |
523 | plen += skb_frag_size(frag: &skb_shinfo(head)->frags[i]); |
524 | clone->data_len = head->data_len - plen; |
525 | clone->len = clone->data_len; |
526 | head->truesize += clone->truesize; |
527 | clone->csum = 0; |
528 | clone->ip_summed = head->ip_summed; |
529 | add_frag_mem_limit(fqdir: q->fqdir, val: clone->truesize); |
530 | skb_shinfo(head)->frag_list = clone; |
531 | nextp = &clone->next; |
532 | } else { |
533 | nextp = &skb_shinfo(head)->frag_list; |
534 | } |
535 | |
536 | out_restore_sk: |
537 | if (orig_truesize) { |
538 | int ts_delta = head->truesize - orig_truesize; |
539 | |
540 | /* if this reassembled skb is fragmented later, |
541 | * fraglist skbs will get skb->sk assigned from head->sk, |
542 | * and each frag skb will be released via sock_wfree. |
543 | * |
544 | * Update sk_wmem_alloc. |
545 | */ |
546 | head->sk = sk; |
547 | head->destructor = destructor; |
548 | refcount_add(i: ts_delta, r: &sk->sk_wmem_alloc); |
549 | } |
550 | |
551 | return nextp; |
552 | } |
553 | EXPORT_SYMBOL(inet_frag_reasm_prepare); |
554 | |
555 | void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, |
556 | void *reasm_data, bool try_coalesce) |
557 | { |
558 | struct sock *sk = is_skb_wmem(skb: head) ? head->sk : NULL; |
559 | const unsigned int head_truesize = head->truesize; |
560 | struct sk_buff **nextp = reasm_data; |
561 | struct rb_node *rbn; |
562 | struct sk_buff *fp; |
563 | int sum_truesize; |
564 | |
565 | skb_push(skb: head, len: head->data - skb_network_header(skb: head)); |
566 | |
567 | /* Traverse the tree in order, to build frag_list. */ |
568 | fp = FRAG_CB(head)->next_frag; |
569 | rbn = rb_next(&head->rbnode); |
570 | rb_erase(&head->rbnode, &q->rb_fragments); |
571 | |
572 | sum_truesize = head->truesize; |
573 | while (rbn || fp) { |
574 | /* fp points to the next sk_buff in the current run; |
575 | * rbn points to the next run. |
576 | */ |
577 | /* Go through the current run. */ |
578 | while (fp) { |
579 | struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; |
580 | bool stolen; |
581 | int delta; |
582 | |
583 | sum_truesize += fp->truesize; |
584 | if (head->ip_summed != fp->ip_summed) |
585 | head->ip_summed = CHECKSUM_NONE; |
586 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
587 | head->csum = csum_add(csum: head->csum, addend: fp->csum); |
588 | |
589 | if (try_coalesce && skb_try_coalesce(to: head, from: fp, fragstolen: &stolen, |
590 | delta_truesize: &delta)) { |
591 | kfree_skb_partial(skb: fp, head_stolen: stolen); |
592 | } else { |
593 | fp->prev = NULL; |
594 | memset(&fp->rbnode, 0, sizeof(fp->rbnode)); |
595 | fp->sk = NULL; |
596 | |
597 | head->data_len += fp->len; |
598 | head->len += fp->len; |
599 | head->truesize += fp->truesize; |
600 | |
601 | *nextp = fp; |
602 | nextp = &fp->next; |
603 | } |
604 | |
605 | fp = next_frag; |
606 | } |
607 | /* Move to the next run. */ |
608 | if (rbn) { |
609 | struct rb_node *rbnext = rb_next(rbn); |
610 | |
611 | fp = rb_to_skb(rbn); |
612 | rb_erase(rbn, &q->rb_fragments); |
613 | rbn = rbnext; |
614 | } |
615 | } |
616 | sub_frag_mem_limit(fqdir: q->fqdir, val: sum_truesize); |
617 | |
618 | *nextp = NULL; |
619 | skb_mark_not_on_list(skb: head); |
620 | head->prev = NULL; |
621 | head->tstamp = q->stamp; |
622 | head->mono_delivery_time = q->mono_delivery_time; |
623 | |
624 | if (sk) |
625 | refcount_add(i: sum_truesize - head_truesize, r: &sk->sk_wmem_alloc); |
626 | } |
627 | EXPORT_SYMBOL(inet_frag_reasm_finish); |
628 | |
629 | struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) |
630 | { |
631 | struct sk_buff *head, *skb; |
632 | |
633 | head = skb_rb_first(&q->rb_fragments); |
634 | if (!head) |
635 | return NULL; |
636 | skb = FRAG_CB(head)->next_frag; |
637 | if (skb) |
638 | rb_replace_node(victim: &head->rbnode, new: &skb->rbnode, |
639 | root: &q->rb_fragments); |
640 | else |
641 | rb_erase(&head->rbnode, &q->rb_fragments); |
642 | memset(&head->rbnode, 0, sizeof(head->rbnode)); |
643 | barrier(); |
644 | |
645 | if (head == q->fragments_tail) |
646 | q->fragments_tail = NULL; |
647 | |
648 | sub_frag_mem_limit(fqdir: q->fqdir, val: head->truesize); |
649 | |
650 | return head; |
651 | } |
652 | EXPORT_SYMBOL(inet_frag_pull_head); |
653 | |