skbuff.c source code [linux/net/core/skbuff.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Routines having to do with the 'struct sk_buff' memory handlers.
4	*
5	* Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
6	* Florian La Roche <rzsfl@rz.uni-sb.de>
7	*
8	* Fixes:
9	* Alan Cox : Fixed the worst of the load
10	* balancer bugs.
11	* Dave Platt : Interrupt stacking fix.
12	* Richard Kooijman : Timestamp fixes.
13	* Alan Cox : Changed buffer format.
14	* Alan Cox : destructor hook for AF_UNIX etc.
15	* Linus Torvalds : Better skb_clone.
16	* Alan Cox : Added skb_copy.
17	* Alan Cox : Added all the changed routines Linus
18	* only put in the headers
19	* Ray VanTassle : Fixed --skb->lock in free
20	* Alan Cox : skb_copy copy arp field
21	* Andi Kleen : slabified it.
22	* Robert Olsson : Removed skb_head_pool
23	*
24	* NOTE:
25	* The __skb_ routines should be called with interrupts
26	* disabled, or you better be real sure that the operation is atomic
27	* with respect to whatever list is being frobbed (e.g. via lock_sock()
28	* or via disabling bottom half handlers, etc).
29	*/
30
31	/*
32	* The functions in this file will not compile correctly with gcc 2.4.x
33	*/
34
35	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
36
37	#include <linux/module.h>
38	#include <linux/types.h>
39	#include <linux/kernel.h>
40	#include <linux/mm.h>
41	#include <linux/interrupt.h>
42	#include <linux/in.h>
43	#include <linux/inet.h>
44	#include <linux/slab.h>
45	#include <linux/tcp.h>
46	#include <linux/udp.h>
47	#include <linux/sctp.h>
48	#include <linux/netdevice.h>
49	#ifdef CONFIG_NET_CLS_ACT
50	#include <net/pkt_sched.h>
51	#endif
52	#include <linux/string.h>
53	#include <linux/skbuff.h>
54	#include <linux/skbuff_ref.h>
55	#include <linux/splice.h>
56	#include <linux/cache.h>
57	#include <linux/rtnetlink.h>
58	#include <linux/init.h>
59	#include <linux/scatterlist.h>
60	#include <linux/errqueue.h>
61	#include <linux/prefetch.h>
62	#include <linux/bitfield.h>
63	#include <linux/if_vlan.h>
64	#include <linux/mpls.h>
65	#include <linux/kcov.h>
66	#include <linux/iov_iter.h>
67	#include <linux/crc32.h>
68
69	#include <net/protocol.h>
70	#include <net/dst.h>
71	#include <net/sock.h>
72	#include <net/checksum.h>
73	#include <net/gro.h>
74	#include <net/gso.h>
75	#include <net/hotdata.h>
76	#include <net/ip6_checksum.h>
77	#include <net/xfrm.h>
78	#include <net/mpls.h>
79	#include <net/mptcp.h>
80	#include <net/mctp.h>
81	#include <net/page_pool/helpers.h>
82	#include <net/dropreason.h>
83
84	#include <linux/uaccess.h>
85	#include <trace/events/skb.h>
86	#include <linux/highmem.h>
87	#include <linux/capability.h>
88	#include <linux/user_namespace.h>
89	#include <linux/indirect_call_wrapper.h>
90	#include <linux/textsearch.h>
91
92	#include "dev.h"
93	#include "devmem.h"
94	#include "netmem_priv.h"
95	#include "sock_destructor.h"
96
97	#ifdef CONFIG_SKB_EXTENSIONS
98	static struct kmem_cache *skbuff_ext_cache __ro_after_init;
99	#endif
100
101	#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN)
102	#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \
103	GRO_MAX_HEAD_PAD))
104
105	/ We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.*
106	* This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
107	* size, and we can differentiate heads from skb_small_head_cache
108	* vs system slabs by looking at their size (skb_end_offset()).
109	*/
110	#define SKB_SMALL_HEAD_CACHE_SIZE \
111	(is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \
112	(SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \
113	SKB_SMALL_HEAD_SIZE)
114
115	#define SKB_SMALL_HEAD_HEADROOM \
116	SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
117
118	/ kcm_write_msgs() relies on casting paged frags to bio_vec to use*
119	* iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
120	* netmem is a page.
121	*/
122	static_assert(offsetof(struct bio_vec, bv_page) ==
123	offsetof(skb_frag_t, netmem));
124	static_assert(sizeof_field(struct bio_vec, bv_page) ==
125	sizeof_field(skb_frag_t, netmem));
126
127	static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len));
128	static_assert(sizeof_field(struct bio_vec, bv_len) ==
129	sizeof_field(skb_frag_t, len));
130
131	static_assert(offsetof(struct bio_vec, bv_offset) ==
132	offsetof(skb_frag_t, offset));
133	static_assert(sizeof_field(struct bio_vec, bv_offset) ==
134	sizeof_field(skb_frag_t, offset));
135
136	#undef FN
137	#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
138	static const char * const drop_reasons[] = {
139	[SKB_CONSUMED] = "CONSUMED",
140	DEFINE_DROP_REASON(FN, FN)
141	};
142
143	static const struct drop_reason_list drop_reasons_core = {
144	.reasons = drop_reasons,
145	.n_reasons = ARRAY_SIZE(drop_reasons),
146	};
147
148	const struct drop_reason_list __rcu *
149	drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = {
150	[SKB_DROP_REASON_SUBSYS_CORE] = RCU_INITIALIZER(&drop_reasons_core),
151	};
152	EXPORT_SYMBOL(drop_reasons_by_subsys);
153
154	/**
155	* drop_reasons_register_subsys - register another drop reason subsystem
156	* @subsys: the subsystem to register, must not be the core
157	* @list: the list of drop reasons within the subsystem, must point to
158	* a statically initialized list
159	*/
160	void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys,
161	const struct drop_reason_list *list)
162	{
163	if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE \|\|
164	subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
165	"invalid subsystem %d\n", subsys))
166	return;
167
168	/ must point to statically allocated memory, so INIT is OK /
169	RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], list);
170	}
171	EXPORT_SYMBOL_GPL(drop_reasons_register_subsys);
172
173	/**
174	* drop_reasons_unregister_subsys - unregister a drop reason subsystem
175	* @subsys: the subsystem to remove, must not be the core
176	*
177	* Note: This will synchronize_rcu() to ensure no users when it returns.
178	*/
179	void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys)
180	{
181	if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE \|\|
182	subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
183	"invalid subsystem %d\n", subsys))
184	return;
185
186	RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], NULL);
187
188	synchronize_rcu();
189	}
190	EXPORT_SYMBOL_GPL(drop_reasons_unregister_subsys);
191
192	/**
193	* skb_panic - private function for out-of-line support
194	* @skb: buffer
195	* @sz: size
196	* @addr: address
197	* @msg: skb_over_panic or skb_under_panic
198	*
199	* Out-of-line support for skb_put() and skb_push().
200	* Called via the wrapper skb_over_panic() or skb_under_panic().
201	* Keep out of line to prevent kernel bloat.
202	* __builtin_return_address is not used because it is not always reliable.
203	*/
204	static void skb_panic(struct sk_buff skb, unsigned* int sz, void *addr,
205	const char msg[])
206	{
207	pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
208	msg, addr, skb->len, sz, skb->head, skb->data,
209	(unsigned long)skb->tail, (unsigned long)skb->end,
210	skb->dev ? skb->dev->name : "<NULL>");
211	BUG();
212	}
213
214	static void skb_over_panic(struct sk_buff skb, unsigned* int sz, void *addr)
215	{
216	skb_panic(skb, sz, addr, msg: __func__);
217	}
218
219	static void skb_under_panic(struct sk_buff skb, unsigned* int sz, void *addr)
220	{
221	skb_panic(skb, sz, addr, msg: __func__);
222	}
223
224	#define NAPI_SKB_CACHE_SIZE 64
225	#define NAPI_SKB_CACHE_BULK 16
226	#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
227
228	struct napi_alloc_cache {
229	local_lock_t bh_lock;
230	struct page_frag_cache page;
231	unsigned int skb_count;
232	void *skb_cache[NAPI_SKB_CACHE_SIZE];
233	};
234
235	static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
236	static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
237	.bh_lock = INIT_LOCAL_LOCK(bh_lock),
238	};
239
240	void __napi_alloc_frag_align(unsigned* int fragsz, unsigned int align_mask)
241	{
242	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
243	void *data;
244
245	fragsz = SKB_DATA_ALIGN(fragsz);
246
247	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
248	data = __page_frag_alloc_align(nc: &nc->page, fragsz,
249	GFP_ATOMIC \| __GFP_NOWARN, align_mask);
250	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
251	return data;
252
253	}
254	EXPORT_SYMBOL(__napi_alloc_frag_align);
255
256	void __netdev_alloc_frag_align(unsigned* int fragsz, unsigned int align_mask)
257	{
258	void *data;
259
260	if (in_hardirq() \|\| irqs_disabled()) {
261	struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
262
263	fragsz = SKB_DATA_ALIGN(fragsz);
264	data = __page_frag_alloc_align(nc, fragsz,
265	GFP_ATOMIC \| __GFP_NOWARN,
266	align_mask);
267	} else {
268	local_bh_disable();
269	data = __napi_alloc_frag_align(fragsz, align_mask);
270	local_bh_enable();
271	}
272	return data;
273	}
274	EXPORT_SYMBOL(__netdev_alloc_frag_align);
275
276	static struct sk_buff napi_skb_cache_get(void*)
277	{
278	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
279	struct sk_buff *skb;
280
281	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
282	if (unlikely(!nc->skb_count)) {
283	nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
284	GFP_ATOMIC \| __GFP_NOWARN,
285	NAPI_SKB_CACHE_BULK,
286	nc->skb_cache);
287	if (unlikely(!nc->skb_count)) {
288	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
289	return NULL;
290	}
291	}
292
293	skb = nc->skb_cache[--nc->skb_count];
294	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
295	kasan_mempool_unpoison_object(ptr: skb, size: kmem_cache_size(s: net_hotdata.skbuff_cache));
296
297	return skb;
298	}
299
300	/**
301	* napi_skb_cache_get_bulk - obtain a number of zeroed skb heads from the cache
302	* @skbs: pointer to an at least @n-sized array to fill with skb pointers
303	* @n: number of entries to provide
304	*
305	* Tries to obtain @n &sk_buff entries from the NAPI percpu cache and writes
306	* the pointers into the provided array @skbs. If there are less entries
307	* available, tries to replenish the cache and bulk-allocates the diff from
308	* the MM layer if needed.
309	* The heads are being zeroed with either memset() or %__GFP_ZERO, so they are
310	* ready for {,__}build_skb_around() and don't have any data buffers attached.
311	* Must be called only from the BH context.
312	*
313	* Return: number of successfully allocated skbs (@n if no actual allocation
314	* needed or kmem_cache_alloc_bulk() didn't fail).
315	*/
316	u32 napi_skb_cache_get_bulk(void **skbs, u32 n)
317	{
318	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
319	u32 bulk, total = n;
320
321	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
322
323	if (nc->skb_count >= n)
324	goto get;
325
326	/ No enough cached skbs. Try refilling the cache first /
327	bulk = min(NAPI_SKB_CACHE_SIZE - nc->skb_count, NAPI_SKB_CACHE_BULK);
328	nc->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
329	GFP_ATOMIC \| __GFP_NOWARN, bulk,
330	&nc->skb_cache[nc->skb_count]);
331	if (likely(nc->skb_count >= n))
332	goto get;
333
334	/ Still not enough. Bulk-allocate the missing part directly, zeroed /
335	n -= kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
336	GFP_ATOMIC \| __GFP_ZERO \| __GFP_NOWARN,
337	n - nc->skb_count, &skbs[nc->skb_count]);
338	if (likely(nc->skb_count >= n))
339	goto get;
340
341	/ kmem_cache didn't allocate the number we need, limit the output /
342	total -= n - nc->skb_count;
343	n = nc->skb_count;
344
345	get:
346	for (u32 base = nc->skb_count - n, i = `0`; i < n; i++) {
347	u32 cache_size = kmem_cache_size(s: net_hotdata.skbuff_cache);
348
349	skbs[i] = nc->skb_cache[base + i];
350
351	kasan_mempool_unpoison_object(ptr: skbs[i], size: cache_size);
352	memset(skbs[i], `0`, offsetof(struct sk_buff, tail));
353	}
354
355	nc->skb_count -= n;
356	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
357
358	return total;
359	}
360	EXPORT_SYMBOL_GPL(napi_skb_cache_get_bulk);
361
362	static inline void __finalize_skb_around(struct sk_buff skb, void* *data,
363	unsigned int size)
364	{
365	struct skb_shared_info *shinfo;
366
367	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
368
369	/ Assumes caller memset cleared SKB /
370	skb->truesize = SKB_TRUESIZE(size);
371	refcount_set(r: &skb->users, n: `1`);
372	skb->head = data;
373	skb->data = data;
374	skb_reset_tail_pointer(skb);
375	skb_set_end_offset(skb, offset: size);
376	skb->mac_header = (typeof(skb->mac_header))~`0U`;
377	skb->transport_header = (typeof(skb->transport_header))~`0U`;
378	skb->alloc_cpu = raw_smp_processor_id();
379	/ make sure we initialize shinfo sequentially /
380	shinfo = skb_shinfo(skb);
381	memset(shinfo, `0`, offsetof(struct skb_shared_info, dataref));
382	atomic_set(v: &shinfo->dataref, i: `1`);
383
384	skb_set_kcov_handle(skb, kcov_handle: kcov_common_handle());
385	}
386
387	static inline void __slab_build_skb(struct* sk_buff skb, void* *data,
388	unsigned int *size)
389	{
390	void *resized;
391
392	/ Must find the allocation size (and grow it to match). /
393	*size = ksize(objp: data);
394	/ krealloc() will immediately return "data" when*
395	* "ksize(data)" is requested: it is the existing upper
396	* bounds. As a result, GFP_ATOMIC will be ignored. Note
397	* that this "new" pointer needs to be passed back to the
398	* caller for use so the __alloc_size hinting will be
399	* tracked correctly.
400	*/
401	resized = krealloc(data, *size, GFP_ATOMIC);
402	WARN_ON_ONCE(resized != data);
403	return resized;
404	}
405
406	/ build_skb() variant which can operate on slab buffers.*
407	* Note that this should be used sparingly as slab buffers
408	* cannot be combined efficiently by GRO!
409	*/
410	struct sk_buff slab_build_skb(void* *data)
411	{
412	struct sk_buff *skb;
413	unsigned int size;
414
415	skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
416	GFP_ATOMIC \| __GFP_NOWARN);
417	if (unlikely(!skb))
418	return NULL;
419
420	memset(skb, `0`, offsetof(struct sk_buff, tail));
421	data = __slab_build_skb(skb, data, size: &size);
422	__finalize_skb_around(skb, data, size);
423
424	return skb;
425	}
426	EXPORT_SYMBOL(slab_build_skb);
427
428	/ Caller must provide SKB that is memset cleared /
429	static void __build_skb_around(struct sk_buff skb, void* *data,
430	unsigned int frag_size)
431	{
432	unsigned int size = frag_size;
433
434	/ frag_size == 0 is considered deprecated now. Callers*
435	* using slab buffer should use slab_build_skb() instead.
436	*/
437	if (WARN_ONCE(size == `0`, "Use slab_build_skb() instead"))
438	data = __slab_build_skb(skb, data, size: &size);
439
440	__finalize_skb_around(skb, data, size);
441	}
442
443	/**
444	* __build_skb - build a network buffer
445	* @data: data buffer provided by caller
446	* @frag_size: size of data (must not be 0)
447	*
448	* Allocate a new &sk_buff. Caller provides space holding head and
449	* skb_shared_info. @data must have been allocated from the page
450	* allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc()
451	* allocation is deprecated, and callers should use slab_build_skb()
452	* instead.)
453	* The return is the new skb buffer.
454	* On a failure the return is %NULL, and @data is not freed.
455	* Notes :
456	* Before IO, driver allocates only data buffer where NIC put incoming frame
457	* Driver should add room at head (NET_SKB_PAD) and
458	* MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
459	* After IO, driver calls build_skb(), to allocate sk_buff and populate it
460	* before giving packet to stack.
461	* RX rings only contains data buffers, not full skbs.
462	*/
463	struct sk_buff __build_skb(void* data, unsigned* int frag_size)
464	{
465	struct sk_buff *skb;
466
467	skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
468	GFP_ATOMIC \| __GFP_NOWARN);
469	if (unlikely(!skb))
470	return NULL;
471
472	memset(skb, `0`, offsetof(struct sk_buff, tail));
473	__build_skb_around(skb, data, frag_size);
474
475	return skb;
476	}
477
478	/ build_skb() is wrapper over __build_skb(), that specifically*
479	* takes care of skb->head and skb->pfmemalloc
480	*/
481	struct sk_buff build_skb(void* data, unsigned* int frag_size)
482	{
483	struct sk_buff *skb = __build_skb(data, frag_size);
484
485	if (likely(skb && frag_size)) {
486	skb->head_frag = `1`;
487	skb_propagate_pfmemalloc(page: virt_to_head_page(x: data), skb);
488	}
489	return skb;
490	}
491	EXPORT_SYMBOL(build_skb);
492
493	/**
494	* build_skb_around - build a network buffer around provided skb
495	* @skb: sk_buff provide by caller, must be memset cleared
496	* @data: data buffer provided by caller
497	* @frag_size: size of data
498	*/
499	struct sk_buff build_skb_around(struct* sk_buff *skb,
500	void data, unsigned* int frag_size)
501	{
502	if (unlikely(!skb))
503	return NULL;
504
505	__build_skb_around(skb, data, frag_size);
506
507	if (frag_size) {
508	skb->head_frag = `1`;
509	skb_propagate_pfmemalloc(page: virt_to_head_page(x: data), skb);
510	}
511	return skb;
512	}
513	EXPORT_SYMBOL(build_skb_around);
514
515	/**
516	* __napi_build_skb - build a network buffer
517	* @data: data buffer provided by caller
518	* @frag_size: size of data
519	*
520	* Version of __build_skb() that uses NAPI percpu caches to obtain
521	* skbuff_head instead of inplace allocation.
522	*
523	* Returns a new &sk_buff on success, %NULL on allocation failure.
524	*/
525	static struct sk_buff __napi_build_skb(void* data, unsigned* int frag_size)
526	{
527	struct sk_buff *skb;
528
529	skb = napi_skb_cache_get();
530	if (unlikely(!skb))
531	return NULL;
532
533	memset(skb, `0`, offsetof(struct sk_buff, tail));
534	__build_skb_around(skb, data, frag_size);
535
536	return skb;
537	}
538
539	/**
540	* napi_build_skb - build a network buffer
541	* @data: data buffer provided by caller
542	* @frag_size: size of data
543	*
544	* Version of __napi_build_skb() that takes care of skb->head_frag
545	* and skb->pfmemalloc when the data is a page or page fragment.
546	*
547	* Returns a new &sk_buff on success, %NULL on allocation failure.
548	*/
549	struct sk_buff napi_build_skb(void* data, unsigned* int frag_size)
550	{
551	struct sk_buff *skb = __napi_build_skb(data, frag_size);
552
553	if (likely(skb) && frag_size) {
554	skb->head_frag = `1`;
555	skb_propagate_pfmemalloc(page: virt_to_head_page(x: data), skb);
556	}
557
558	return skb;
559	}
560	EXPORT_SYMBOL(napi_build_skb);
561
562	/*
563	* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
564	* the caller if emergency pfmemalloc reserves are being used. If it is and
565	* the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
566	* may be used. Otherwise, the packet data may be discarded until enough
567	* memory is free
568	*/
569	static void kmalloc_reserve(unsigned* int size, gfp_t flags, int* node,
570	bool *pfmemalloc)
571	{
572	bool ret_pfmemalloc = false;
573	size_t obj_size;
574	void *obj;
575
576	obj_size = SKB_HEAD_ALIGN(*size);
577	if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
578	!(flags & KMALLOC_NOT_NORMAL_BITS)) {
579	obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
580	flags \| __GFP_NOMEMALLOC \| __GFP_NOWARN,
581	node);
582	*size = SKB_SMALL_HEAD_CACHE_SIZE;
583	if (obj \|\| !(gfp_pfmemalloc_allowed(gfp_mask: flags)))
584	goto out;
585	/ Try again but now we are using pfmemalloc reserves /
586	ret_pfmemalloc = true;
587	obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node);
588	goto out;
589	}
590
591	obj_size = kmalloc_size_roundup(size: obj_size);
592	/ The following cast might truncate high-order bits of obj_size, this*
593	* is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
594	*/
595	size = (unsigned* int)obj_size;
596
597	/*
598	* Try a regular allocation, when that fails and we're not entitled
599	* to the reserves, fail.
600	*/
601	obj = kmalloc_node_track_caller(obj_size,
602	flags \| __GFP_NOMEMALLOC \| __GFP_NOWARN,
603	node);
604	if (obj \|\| !(gfp_pfmemalloc_allowed(gfp_mask: flags)))
605	goto out;
606
607	/ Try again but now we are using pfmemalloc reserves /
608	ret_pfmemalloc = true;
609	obj = kmalloc_node_track_caller(obj_size, flags, node);
610
611	out:
612	if (pfmemalloc)
613	*pfmemalloc = ret_pfmemalloc;
614
615	return obj;
616	}
617
618	/ Allocate a new skbuff. We do this ourselves so we can fill in a few*
619	* 'private' fields and also do memory statistics to find all the
620	* [BEEP] leaks.
621	*
622	*/
623
624	/**
625	* __alloc_skb - allocate a network buffer
626	* @size: size to allocate
627	* @gfp_mask: allocation mask
628	* @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
629	* instead of head cache and allocate a cloned (child) skb.
630	* If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
631	* allocations in case the data is required for writeback
632	* @node: numa node to allocate memory on
633	*
634	* Allocate a new &sk_buff. The returned buffer has no headroom and a
635	* tail room of at least size bytes. The object has a reference count
636	* of one. The return is the buffer. On a failure the return is %NULL.
637	*
638	* Buffers may only be allocated from interrupts using a @gfp_mask of
639	* %GFP_ATOMIC.
640	*/
641	struct sk_buff __alloc_skb(unsigned* int size, gfp_t gfp_mask,
642	int flags, int node)
643	{
644	struct kmem_cache *cache;
645	struct sk_buff *skb;
646	bool pfmemalloc;
647	u8 *data;
648
649	cache = (flags & SKB_ALLOC_FCLONE)
650	? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache;
651
652	if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
653	gfp_mask \|= __GFP_MEMALLOC;
654
655	/ Get the HEAD /
656	if ((flags & (SKB_ALLOC_FCLONE \| SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI &&
657	likely(node == NUMA_NO_NODE \|\| node == numa_mem_id()))
658	skb = napi_skb_cache_get();
659	else
660	skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node);
661	if (unlikely(!skb))
662	return NULL;
663	prefetchw(x: skb);
664
665	/ We do our best to align skb_shared_info on a separate cache*
666	* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
667	* aligned memory blocks, unless SLUB/SLAB debug is enabled.
668	* Both skb->head and skb_shared_info are cache line aligned.
669	*/
670	data = kmalloc_reserve(size: &size, flags: gfp_mask, node, pfmemalloc: &pfmemalloc);
671	if (unlikely(!data))
672	goto nodata;
673	/ kmalloc_size_roundup() might give us more room than requested.*
674	* Put skb_shared_info exactly at the end of allocated zone,
675	* to allow max possible filling before reallocation.
676	*/
677	prefetchw(x: data + SKB_WITH_OVERHEAD(size));
678
679	/*
680	* Only clear those fields we need to clear, not those that we will
681	* actually initialise below. Hence, don't put any more fields after
682	* the tail pointer in struct sk_buff!
683	*/
684	memset(skb, `0`, offsetof(struct sk_buff, tail));
685	__build_skb_around(skb, data, frag_size: size);
686	skb->pfmemalloc = pfmemalloc;
687
688	if (flags & SKB_ALLOC_FCLONE) {
689	struct sk_buff_fclones *fclones;
690
691	fclones = container_of(skb, struct sk_buff_fclones, skb1);
692
693	skb->fclone = SKB_FCLONE_ORIG;
694	refcount_set(r: &fclones->fclone_ref, n: `1`);
695	}
696
697	return skb;
698
699	nodata:
700	kmem_cache_free(s: cache, objp: skb);
701	return NULL;
702	}
703	EXPORT_SYMBOL(__alloc_skb);
704
705	/**
706	* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
707	* @dev: network device to receive on
708	* @len: length to allocate
709	* @gfp_mask: get_free_pages mask, passed to alloc_skb
710	*
711	* Allocate a new &sk_buff and assign it a usage count of one. The
712	* buffer has NET_SKB_PAD headroom built in. Users should allocate
713	* the headroom they think they need without accounting for the
714	* built in space. The built in space is used for optimisations.
715	*
716	* %NULL is returned if there is no free memory.
717	*/
718	struct sk_buff __netdev_alloc_skb(struct* net_device dev, unsigned* int len,
719	gfp_t gfp_mask)
720	{
721	struct page_frag_cache *nc;
722	struct sk_buff *skb;
723	bool pfmemalloc;
724	void *data;
725
726	len += NET_SKB_PAD;
727
728	/ If requested length is either too small or too big,*
729	* we use kmalloc() for skb->head allocation.
730	*/
731	if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) \|\|
732	len > SKB_WITH_OVERHEAD(PAGE_SIZE) \|\|
733	(gfp_mask & (__GFP_DIRECT_RECLAIM \| GFP_DMA))) {
734	skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
735	if (!skb)
736	goto skb_fail;
737	goto skb_success;
738	}
739
740	len = SKB_HEAD_ALIGN(len);
741
742	if (sk_memalloc_socks())
743	gfp_mask \|= __GFP_MEMALLOC;
744
745	if (in_hardirq() \|\| irqs_disabled()) {
746	nc = this_cpu_ptr(&netdev_alloc_cache);
747	data = page_frag_alloc(nc, fragsz: len, gfp_mask);
748	pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
749	} else {
750	local_bh_disable();
751	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
752
753	nc = this_cpu_ptr(&napi_alloc_cache.page);
754	data = page_frag_alloc(nc, fragsz: len, gfp_mask);
755	pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
756
757	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
758	local_bh_enable();
759	}
760
761	if (unlikely(!data))
762	return NULL;
763
764	skb = __build_skb(data, frag_size: len);
765	if (unlikely(!skb)) {
766	skb_free_frag(addr: data);
767	return NULL;
768	}
769
770	if (pfmemalloc)
771	skb->pfmemalloc = `1`;
772	skb->head_frag = `1`;
773
774	skb_success:
775	skb_reserve(skb, NET_SKB_PAD);
776	skb->dev = dev;
777
778	skb_fail:
779	return skb;
780	}
781	EXPORT_SYMBOL(__netdev_alloc_skb);
782
783	/**
784	* napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
785	* @napi: napi instance this buffer was allocated for
786	* @len: length to allocate
787	*
788	* Allocate a new sk_buff for use in NAPI receive. This buffer will
789	* attempt to allocate the head from a special reserved region used
790	* only for NAPI Rx allocation. By doing this we can save several
791	* CPU cycles by avoiding having to disable and re-enable IRQs.
792	*
793	* %NULL is returned if there is no free memory.
794	*/
795	struct sk_buff napi_alloc_skb(struct* napi_struct napi, unsigned* int len)
796	{
797	gfp_t gfp_mask = GFP_ATOMIC \| __GFP_NOWARN;
798	struct napi_alloc_cache *nc;
799	struct sk_buff *skb;
800	bool pfmemalloc;
801	void *data;
802
803	DEBUG_NET_WARN_ON_ONCE(!in_softirq());
804	len += NET_SKB_PAD + NET_IP_ALIGN;
805
806	/ If requested length is either too small or too big,*
807	* we use kmalloc() for skb->head allocation.
808	*/
809	if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) \|\|
810	len > SKB_WITH_OVERHEAD(PAGE_SIZE) \|\|
811	(gfp_mask & (__GFP_DIRECT_RECLAIM \| GFP_DMA))) {
812	skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX \| SKB_ALLOC_NAPI,
813	NUMA_NO_NODE);
814	if (!skb)
815	goto skb_fail;
816	goto skb_success;
817	}
818
819	len = SKB_HEAD_ALIGN(len);
820
821	if (sk_memalloc_socks())
822	gfp_mask \|= __GFP_MEMALLOC;
823
824	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
825	nc = this_cpu_ptr(&napi_alloc_cache);
826
827	data = page_frag_alloc(nc: &nc->page, fragsz: len, gfp_mask);
828	pfmemalloc = page_frag_cache_is_pfmemalloc(nc: &nc->page);
829	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
830
831	if (unlikely(!data))
832	return NULL;
833
834	skb = __napi_build_skb(data, frag_size: len);
835	if (unlikely(!skb)) {
836	skb_free_frag(addr: data);
837	return NULL;
838	}
839
840	if (pfmemalloc)
841	skb->pfmemalloc = `1`;
842	skb->head_frag = `1`;
843
844	skb_success:
845	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
846	skb->dev = napi->dev;
847
848	skb_fail:
849	return skb;
850	}
851	EXPORT_SYMBOL(napi_alloc_skb);
852
853	void skb_add_rx_frag_netmem(struct sk_buff skb, int* i, netmem_ref netmem,
854	int off, int size, unsigned int truesize)
855	{
856	DEBUG_NET_WARN_ON_ONCE(size > truesize);
857
858	skb_fill_netmem_desc(skb, i, netmem, off, size);
859	skb->len += size;
860	skb->data_len += size;
861	skb->truesize += truesize;
862	}
863	EXPORT_SYMBOL(skb_add_rx_frag_netmem);
864
865	void skb_coalesce_rx_frag(struct sk_buff skb, int* i, int size,
866	unsigned int truesize)
867	{
868	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
869
870	DEBUG_NET_WARN_ON_ONCE(size > truesize);
871
872	skb_frag_size_add(frag, delta: size);
873	skb->len += size;
874	skb->data_len += size;
875	skb->truesize += truesize;
876	}
877	EXPORT_SYMBOL(skb_coalesce_rx_frag);
878
879	static void skb_drop_list(struct sk_buff **listp)
880	{
881	kfree_skb_list(segs: *listp);
882	*listp = NULL;
883	}
884
885	static inline void skb_drop_fraglist(struct sk_buff *skb)
886	{
887	skb_drop_list(listp: &skb_shinfo(skb)->frag_list);
888	}
889
890	static void skb_clone_fraglist(struct sk_buff *skb)
891	{
892	struct sk_buff *list;
893
894	skb_walk_frags(skb, list)
895	skb_get(skb: list);
896	}
897
898	int skb_pp_cow_data(struct page_pool pool, struct* sk_buff **pskb,
899	unsigned int headroom)
900	{
901	#if IS_ENABLED(CONFIG_PAGE_POOL)
902	u32 size, truesize, len, max_head_size, off;
903	struct sk_buff skb = pskb, *nskb;
904	int err, i, head_off;
905	void *data;
906
907	/ XDP does not support fraglist so we need to linearize*
908	* the skb.
909	*/
910	if (skb_has_frag_list(skb))
911	return -EOPNOTSUPP;
912
913	max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom);
914	if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE)
915	return -ENOMEM;
916
917	size = min_t(u32, skb->len, max_head_size);
918	truesize = SKB_HEAD_ALIGN(size) + headroom;
919	data = page_pool_dev_alloc_va(pool, size: &truesize);
920	if (!data)
921	return -ENOMEM;
922
923	nskb = napi_build_skb(data, truesize);
924	if (!nskb) {
925	page_pool_free_va(pool, va: data, allow_direct: true);
926	return -ENOMEM;
927	}
928
929	skb_reserve(skb: nskb, len: headroom);
930	skb_copy_header(new: nskb, old: skb);
931	skb_mark_for_recycle(skb: nskb);
932
933	err = skb_copy_bits(skb, offset: `0`, to: nskb->data, len: size);
934	if (err) {
935	consume_skb(skb: nskb);
936	return err;
937	}
938	skb_put(skb: nskb, len: size);
939
940	head_off = skb_headroom(skb: nskb) - skb_headroom(skb);
941	skb_headers_offset_update(skb: nskb, off: head_off);
942
943	off = size;
944	len = skb->len - off;
945	for (i = `0`; i < MAX_SKB_FRAGS && off < skb->len; i++) {
946	struct page *page;
947	u32 page_off;
948
949	size = min_t(u32, len, PAGE_SIZE);
950	truesize = size;
951
952	page = page_pool_dev_alloc(pool, offset: &page_off, size: &truesize);
953	if (!page) {
954	consume_skb(skb: nskb);
955	return -ENOMEM;
956	}
957
958	skb_add_rx_frag(skb: nskb, i, page, off: page_off, size, truesize);
959	err = skb_copy_bits(skb, offset: off, page_address(page) + page_off,
960	len: size);
961	if (err) {
962	consume_skb(skb: nskb);
963	return err;
964	}
965
966	len -= size;
967	off += size;
968	}
969
970	consume_skb(skb);
971	*pskb = nskb;
972
973	return `0`;
974	#else
975	return -EOPNOTSUPP;
976	#endif
977	}
978	EXPORT_SYMBOL(skb_pp_cow_data);
979
980	int skb_cow_data_for_xdp(struct page_pool pool, struct* sk_buff **pskb,
981	const struct bpf_prog *prog)
982	{
983	if (!prog->aux->xdp_has_frags)
984	return -EINVAL;
985
986	return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM);
987	}
988	EXPORT_SYMBOL(skb_cow_data_for_xdp);
989
990	#if IS_ENABLED(CONFIG_PAGE_POOL)
991	bool napi_pp_put_page(netmem_ref netmem)
992	{
993	netmem = netmem_compound_head(netmem);
994
995	if (unlikely(!netmem_is_pp(netmem)))
996	return false;
997
998	page_pool_put_full_netmem(pool: netmem_get_pp(netmem), netmem, allow_direct: false);
999
1000	return true;
1001	}
1002	EXPORT_SYMBOL(napi_pp_put_page);
1003	#endif
1004
1005	static bool skb_pp_recycle(struct sk_buff skb, void* *data)
1006	{
1007	if (!IS_ENABLED(CONFIG_PAGE_POOL) \|\| !skb->pp_recycle)
1008	return false;
1009	return napi_pp_put_page(page_to_netmem(virt_to_page(data)));
1010	}
1011
1012	/**
1013	* skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
1014	* @skb: page pool aware skb
1015	*
1016	* Increase the fragment reference count (pp_ref_count) of a skb. This is
1017	* intended to gain fragment references only for page pool aware skbs,
1018	* i.e. when skb->pp_recycle is true, and not for fragments in a
1019	* non-pp-recycling skb. It has a fallback to increase references on normal
1020	* pages, as page pool aware skbs may also have normal page fragments.
1021	*/
1022	static int skb_pp_frag_ref(struct sk_buff *skb)
1023	{
1024	struct skb_shared_info *shinfo;
1025	netmem_ref head_netmem;
1026	int i;
1027
1028	if (!skb->pp_recycle)
1029	return -EINVAL;
1030
1031	shinfo = skb_shinfo(skb);
1032
1033	for (i = `0`; i < shinfo->nr_frags; i++) {
1034	head_netmem = netmem_compound_head(netmem: shinfo->frags[i].netmem);
1035	if (likely(netmem_is_pp(head_netmem)))
1036	page_pool_ref_netmem(netmem: head_netmem);
1037	else
1038	page_ref_inc(page: netmem_to_page(netmem: head_netmem));
1039	}
1040	return `0`;
1041	}
1042
1043	static void skb_kfree_head(void head, unsigned* int end_offset)
1044	{
1045	if (end_offset == SKB_SMALL_HEAD_HEADROOM)
1046	kmem_cache_free(s: net_hotdata.skb_small_head_cache, objp: head);
1047	else
1048	kfree(objp: head);
1049	}
1050
1051	static void skb_free_head(struct sk_buff *skb)
1052	{
1053	unsigned char *head = skb->head;
1054
1055	if (skb->head_frag) {
1056	if (skb_pp_recycle(skb, data: head))
1057	return;
1058	skb_free_frag(addr: head);
1059	} else {
1060	skb_kfree_head(head, end_offset: skb_end_offset(skb));
1061	}
1062	}
1063
1064	static void skb_release_data(struct sk_buff skb, enum* skb_drop_reason reason)
1065	{
1066	struct skb_shared_info *shinfo = skb_shinfo(skb);
1067	int i;
1068
1069	if (!skb_data_unref(skb, shinfo))
1070	goto exit;
1071
1072	if (skb_zcopy(skb)) {
1073	bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS;
1074
1075	skb_zcopy_clear(skb, zerocopy_success: true);
1076	if (skip_unref)
1077	goto free_head;
1078	}
1079
1080	for (i = `0`; i < shinfo->nr_frags; i++)
1081	__skb_frag_unref(frag: &shinfo->frags[i], recycle: skb->pp_recycle);
1082
1083	free_head:
1084	if (shinfo->frag_list)
1085	kfree_skb_list_reason(segs: shinfo->frag_list, reason);
1086
1087	skb_free_head(skb);
1088	exit:
1089	/ When we clone an SKB we copy the reycling bit. The pp_recycle*
1090	* bit is only set on the head though, so in order to avoid races
1091	* while trying to recycle fragments on __skb_frag_unref() we need
1092	* to make one SKB responsible for triggering the recycle path.
1093	* So disable the recycling bit if an SKB is cloned and we have
1094	* additional references to the fragmented part of the SKB.
1095	* Eventually the last SKB will have the recycling bit set and it's
1096	* dataref set to 0, which will trigger the recycling
1097	*/
1098	skb->pp_recycle = `0`;
1099	}
1100
1101	/*
1102	* Free an skbuff by memory without cleaning the state.
1103	*/
1104	static void kfree_skbmem(struct sk_buff *skb)
1105	{
1106	struct sk_buff_fclones *fclones;
1107
1108	switch (skb->fclone) {
1109	case SKB_FCLONE_UNAVAILABLE:
1110	kmem_cache_free(s: net_hotdata.skbuff_cache, objp: skb);
1111	return;
1112
1113	case SKB_FCLONE_ORIG:
1114	fclones = container_of(skb, struct sk_buff_fclones, skb1);
1115
1116	/ We usually free the clone (TX completion) before original skb*
1117	* This test would have no chance to be true for the clone,
1118	* while here, branch prediction will be good.
1119	*/
1120	if (refcount_read(r: &fclones->fclone_ref) == `1`)
1121	goto fastpath;
1122	break;
1123
1124	default: / SKB_FCLONE_CLONE /
1125	fclones = container_of(skb, struct sk_buff_fclones, skb2);
1126	break;
1127	}
1128	if (!refcount_dec_and_test(r: &fclones->fclone_ref))
1129	return;
1130	fastpath:
1131	kmem_cache_free(s: net_hotdata.skbuff_fclone_cache, objp: fclones);
1132	}
1133
1134	void skb_release_head_state(struct sk_buff *skb)
1135	{
1136	skb_dst_drop(skb);
1137	if (skb->destructor) {
1138	DEBUG_NET_WARN_ON_ONCE(in_hardirq());
1139	skb->destructor(skb);
1140	}
1141	#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1142	nf_conntrack_put(nfct: skb_nfct(skb));
1143	#endif
1144	skb_ext_put(skb);
1145	}
1146
1147	/ Free everything but the sk_buff shell. /
1148	static void skb_release_all(struct sk_buff skb, enum* skb_drop_reason reason)
1149	{
1150	skb_release_head_state(skb);
1151	if (likely(skb->head))
1152	skb_release_data(skb, reason);
1153	}
1154
1155	/**
1156	* __kfree_skb - private function
1157	* @skb: buffer
1158	*
1159	* Free an sk_buff. Release anything attached to the buffer.
1160	* Clean the state. This is an internal helper function. Users should
1161	* always call kfree_skb
1162	*/
1163
1164	void __kfree_skb(struct sk_buff *skb)
1165	{
1166	skb_release_all(skb, reason: SKB_DROP_REASON_NOT_SPECIFIED);
1167	kfree_skbmem(skb);
1168	}
1169	EXPORT_SYMBOL(__kfree_skb);
1170
1171	static __always_inline
1172	bool __sk_skb_reason_drop(struct sock sk, struct* sk_buff *skb,
1173	enum skb_drop_reason reason)
1174	{
1175	if (unlikely(!skb_unref(skb)))
1176	return false;
1177
1178	DEBUG_NET_WARN_ON_ONCE(reason == SKB_NOT_DROPPED_YET \|\|
1179	u32_get_bits(reason,
1180	SKB_DROP_REASON_SUBSYS_MASK) >=
1181	SKB_DROP_REASON_SUBSYS_NUM);
1182
1183	if (reason == SKB_CONSUMED)
1184	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1185	else
1186	trace_kfree_skb(skb, location: __builtin_return_address(`0`), reason, rx_sk: sk);
1187	return true;
1188	}
1189
1190	/**
1191	* sk_skb_reason_drop - free an sk_buff with special reason
1192	* @sk: the socket to receive @skb, or NULL if not applicable
1193	* @skb: buffer to free
1194	* @reason: reason why this skb is dropped
1195	*
1196	* Drop a reference to the buffer and free it if the usage count has hit
1197	* zero. Meanwhile, pass the receiving socket and drop reason to
1198	* 'kfree_skb' tracepoint.
1199	*/
1200	void __fix_address
1201	sk_skb_reason_drop(struct sock sk, struct* sk_buff skb, enum* skb_drop_reason reason)
1202	{
1203	if (__sk_skb_reason_drop(sk, skb, reason))
1204	__kfree_skb(skb);
1205	}
1206	EXPORT_SYMBOL(sk_skb_reason_drop);
1207
1208	#define KFREE_SKB_BULK_SIZE 16
1209
1210	struct skb_free_array {
1211	unsigned int skb_count;
1212	void *skb_array[KFREE_SKB_BULK_SIZE];
1213	};
1214
1215	static void kfree_skb_add_bulk(struct sk_buff *skb,
1216	struct skb_free_array *sa,
1217	enum skb_drop_reason reason)
1218	{
1219	/ if SKB is a clone, don't handle this case /
1220	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
1221	__kfree_skb(skb);
1222	return;
1223	}
1224
1225	skb_release_all(skb, reason);
1226	sa->skb_array[sa->skb_count++] = skb;
1227
1228	if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
1229	kmem_cache_free_bulk(s: net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE,
1230	p: sa->skb_array);
1231	sa->skb_count = `0`;
1232	}
1233	}
1234
1235	void __fix_address
1236	kfree_skb_list_reason(struct sk_buff segs, enum* skb_drop_reason reason)
1237	{
1238	struct skb_free_array sa;
1239
1240	sa.skb_count = `0`;
1241
1242	while (segs) {
1243	struct sk_buff *next = segs->next;
1244
1245	if (__sk_skb_reason_drop(NULL, skb: segs, reason)) {
1246	skb_poison_list(skb: segs);
1247	kfree_skb_add_bulk(skb: segs, sa: &sa, reason);
1248	}
1249
1250	segs = next;
1251	}
1252
1253	if (sa.skb_count)
1254	kmem_cache_free_bulk(s: net_hotdata.skbuff_cache, size: sa.skb_count, p: sa.skb_array);
1255	}
1256	EXPORT_SYMBOL(kfree_skb_list_reason);
1257
1258	/ Dump skb information and contents.*
1259	*
1260	* Must only be called from net_ratelimit()-ed paths.
1261	*
1262	* Dumps whole packets if full_pkt, only headers otherwise.
1263	*/
1264	void skb_dump(const char level, const* struct sk_buff *skb, bool full_pkt)
1265	{
1266	struct skb_shared_info *sh = skb_shinfo(skb);
1267	struct net_device *dev = skb->dev;
1268	struct sock *sk = skb->sk;
1269	struct sk_buff *list_skb;
1270	bool has_mac, has_trans;
1271	int headroom, tailroom;
1272	int i, len, seg_len;
1273
1274	if (full_pkt)
1275	len = skb->len;
1276	else
1277	len = min_t(int, skb->len, MAX_HEADER + `128`);
1278
1279	headroom = skb_headroom(skb);
1280	tailroom = skb_tailroom(skb);
1281
1282	has_mac = skb_mac_header_was_set(skb);
1283	has_trans = skb_transport_header_was_set(skb);
1284
1285	printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
1286	"mac=(%d,%d) mac_len=%u net=(%d,%d) trans=%d\n"
1287	"shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
1288	"csum(0x%x start=%u offset=%u ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
1289	"hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n"
1290	"priority=0x%x mark=0x%x alloc_cpu=%u vlan_all=0x%x\n"
1291	"encapsulation=%d inner(proto=0x%04x, mac=%u, net=%u, trans=%u)\n",
1292	level, skb->len, headroom, skb_headlen(skb), tailroom,
1293	has_mac ? skb->mac_header : -`1`,
1294	has_mac ? skb_mac_header_len(skb) : -`1`,
1295	skb->mac_len,
1296	skb->network_header,
1297	has_trans ? skb_network_header_len(skb) : -`1`,
1298	has_trans ? skb->transport_header : -`1`,
1299	sh->tx_flags, sh->nr_frags,
1300	sh->gso_size, sh->gso_type, sh->gso_segs,
1301	skb->csum, skb->csum_start, skb->csum_offset, skb->ip_summed,
1302	skb->csum_complete_sw, skb->csum_valid, skb->csum_level,
1303	skb->hash, skb->sw_hash, skb->l4_hash,
1304	ntohs(skb->protocol), skb->pkt_type, skb->skb_iif,
1305	skb->priority, skb->mark, skb->alloc_cpu, skb->vlan_all,
1306	skb->encapsulation, skb->inner_protocol, skb->inner_mac_header,
1307	skb->inner_network_header, skb->inner_transport_header);
1308
1309	if (dev)
1310	printk("%sdev name=%s feat=%pNF\n",
1311	level, dev->name, &dev->features);
1312	if (sk)
1313	printk("%ssk family=%hu type=%u proto=%u\n",
1314	level, sk->sk_family, sk->sk_type, sk->sk_protocol);
1315
1316	if (full_pkt && headroom)
1317	print_hex_dump(level, prefix_str: "skb headroom: ", prefix_type: DUMP_PREFIX_OFFSET,
1318	rowsize: `16`, groupsize: `1`, buf: skb->head, len: headroom, ascii: false);
1319
1320	seg_len = min_t(int, skb_headlen(skb), len);
1321	if (seg_len)
1322	print_hex_dump(level, prefix_str: "skb linear: ", prefix_type: DUMP_PREFIX_OFFSET,
1323	rowsize: `16`, groupsize: `1`, buf: skb->data, len: seg_len, ascii: false);
1324	len -= seg_len;
1325
1326	if (full_pkt && tailroom)
1327	print_hex_dump(level, prefix_str: "skb tailroom: ", prefix_type: DUMP_PREFIX_OFFSET,
1328	rowsize: `16`, groupsize: `1`, buf: skb_tail_pointer(skb), len: tailroom, ascii: false);
1329
1330	for (i = `0`; len && i < skb_shinfo(skb)->nr_frags; i++) {
1331	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1332	u32 p_off, p_len, copied;
1333	struct page *p;
1334	u8 *vaddr;
1335
1336	if (skb_frag_is_net_iov(frag)) {
1337	printk("%sskb frag %d: not readable\n", level, i);
1338	len -= skb_frag_size(frag);
1339	if (!len)
1340	break;
1341	continue;
1342	}
1343
1344	skb_frag_foreach_page(frag, skb_frag_off(frag),
1345	skb_frag_size(frag), p, p_off, p_len,
1346	copied) {
1347	seg_len = min_t(int, p_len, len);
1348	vaddr = kmap_atomic(page: p);
1349	print_hex_dump(level, prefix_str: "skb frag: ",
1350	prefix_type: DUMP_PREFIX_OFFSET,
1351	rowsize: `16`, groupsize: `1`, buf: vaddr + p_off, len: seg_len, ascii: false);
1352	kunmap_atomic(vaddr);
1353	len -= seg_len;
1354	if (!len)
1355	break;
1356	}
1357	}
1358
1359	if (full_pkt && skb_has_frag_list(skb)) {
1360	printk("skb fraglist:\n");
1361	skb_walk_frags(skb, list_skb)
1362	skb_dump(level, skb: list_skb, full_pkt: true);
1363	}
1364	}
1365	EXPORT_SYMBOL(skb_dump);
1366
1367	/**
1368	* skb_tx_error - report an sk_buff xmit error
1369	* @skb: buffer that triggered an error
1370	*
1371	* Report xmit error if a device callback is tracking this skb.
1372	* skb must be freed afterwards.
1373	*/
1374	void skb_tx_error(struct sk_buff *skb)
1375	{
1376	if (skb) {
1377	skb_zcopy_downgrade_managed(skb);
1378	skb_zcopy_clear(skb, zerocopy_success: true);
1379	}
1380	}
1381	EXPORT_SYMBOL(skb_tx_error);
1382
1383	#ifdef CONFIG_TRACEPOINTS
1384	/**
1385	* consume_skb - free an skbuff
1386	* @skb: buffer to free
1387	*
1388	* Drop a ref to the buffer and free it if the usage count has hit zero
1389	* Functions identically to kfree_skb, but kfree_skb assumes that the frame
1390	* is being dropped after a failure and notes that
1391	*/
1392	void consume_skb(struct sk_buff *skb)
1393	{
1394	if (!skb_unref(skb))
1395	return;
1396
1397	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1398	__kfree_skb(skb);
1399	}
1400	EXPORT_SYMBOL(consume_skb);
1401	#endif
1402
1403	/**
1404	* __consume_stateless_skb - free an skbuff, assuming it is stateless
1405	* @skb: buffer to free
1406	*
1407	* Alike consume_skb(), but this variant assumes that this is the last
1408	* skb reference and all the head states have been already dropped
1409	*/
1410	void __consume_stateless_skb(struct sk_buff *skb)
1411	{
1412	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1413	skb_release_data(skb, reason: SKB_CONSUMED);
1414	kfree_skbmem(skb);
1415	}
1416
1417	static void napi_skb_cache_put(struct sk_buff *skb)
1418	{
1419	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
1420	u32 i;
1421
1422	if (!kasan_mempool_poison_object(ptr: skb))
1423	return;
1424
1425	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
1426	nc->skb_cache[nc->skb_count++] = skb;
1427
1428	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
1429	for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
1430	kasan_mempool_unpoison_object(ptr: nc->skb_cache[i],
1431	size: kmem_cache_size(s: net_hotdata.skbuff_cache));
1432
1433	kmem_cache_free_bulk(s: net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF,
1434	p: nc->skb_cache + NAPI_SKB_CACHE_HALF);
1435	nc->skb_count = NAPI_SKB_CACHE_HALF;
1436	}
1437	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
1438	}
1439
1440	void __napi_kfree_skb(struct sk_buff skb, enum* skb_drop_reason reason)
1441	{
1442	skb_release_all(skb, reason);
1443	napi_skb_cache_put(skb);
1444	}
1445
1446	void napi_skb_free_stolen_head(struct sk_buff *skb)
1447	{
1448	if (unlikely(skb->slow_gro)) {
1449	nf_reset_ct(skb);
1450	skb_dst_drop(skb);
1451	skb_ext_put(skb);
1452	skb_orphan(skb);
1453	skb->slow_gro = `0`;
1454	}
1455	napi_skb_cache_put(skb);
1456	}
1457
1458	void napi_consume_skb(struct sk_buff skb, int* budget)
1459	{
1460	/ Zero budget indicate non-NAPI context called us, like netpoll /
1461	if (unlikely(!budget)) {
1462	dev_consume_skb_any(skb);
1463	return;
1464	}
1465
1466	DEBUG_NET_WARN_ON_ONCE(!in_softirq());
1467
1468	if (!skb_unref(skb))
1469	return;
1470
1471	/ if reaching here SKB is ready to free /
1472	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1473
1474	/ if SKB is a clone, don't handle this case /
1475	if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
1476	__kfree_skb(skb);
1477	return;
1478	}
1479
1480	skb_release_all(skb, reason: SKB_CONSUMED);
1481	napi_skb_cache_put(skb);
1482	}
1483	EXPORT_SYMBOL(napi_consume_skb);
1484
1485	/ Make sure a field is contained by headers group /
1486	#define CHECK_SKB_FIELD(field) \
1487	BUILD_BUG_ON(offsetof(struct sk_buff, field) != \
1488	offsetof(struct sk_buff, headers.field)); \
1489
1490	static void __copy_skb_header(struct sk_buff new, const* struct sk_buff *old)
1491	{
1492	new->tstamp = old->tstamp;
1493	/ We do not copy old->sk /
1494	new->dev = old->dev;
1495	memcpy(new->cb, old->cb, sizeof(old->cb));
1496	skb_dst_copy(nskb: new, oskb: old);
1497	__skb_ext_copy(dst: new, src: old);
1498	__nf_copy(dst: new, src: old, copy: false);
1499
1500	/ Note : this field could be in the headers group.*
1501	* It is not yet because we do not want to have a 16 bit hole
1502	*/
1503	new->queue_mapping = old->queue_mapping;
1504
1505	memcpy(&new->headers, &old->headers, sizeof(new->headers));
1506	CHECK_SKB_FIELD(protocol);
1507	CHECK_SKB_FIELD(csum);
1508	CHECK_SKB_FIELD(hash);
1509	CHECK_SKB_FIELD(priority);
1510	CHECK_SKB_FIELD(skb_iif);
1511	CHECK_SKB_FIELD(vlan_proto);
1512	CHECK_SKB_FIELD(vlan_tci);
1513	CHECK_SKB_FIELD(transport_header);
1514	CHECK_SKB_FIELD(network_header);
1515	CHECK_SKB_FIELD(mac_header);
1516	CHECK_SKB_FIELD(inner_protocol);
1517	CHECK_SKB_FIELD(inner_transport_header);
1518	CHECK_SKB_FIELD(inner_network_header);
1519	CHECK_SKB_FIELD(inner_mac_header);
1520	CHECK_SKB_FIELD(mark);
1521	#ifdef CONFIG_NETWORK_SECMARK
1522	CHECK_SKB_FIELD(secmark);
1523	#endif
1524	#ifdef CONFIG_NET_RX_BUSY_POLL
1525	CHECK_SKB_FIELD(napi_id);
1526	#endif
1527	CHECK_SKB_FIELD(alloc_cpu);
1528	#ifdef CONFIG_XPS
1529	CHECK_SKB_FIELD(sender_cpu);
1530	#endif
1531	#ifdef CONFIG_NET_SCHED
1532	CHECK_SKB_FIELD(tc_index);
1533	#endif
1534
1535	}
1536
1537	/*
1538	* You should not add any new code to this function. Add it to
1539	* __copy_skb_header above instead.
1540	*/
1541	static struct sk_buff __skb_clone(struct* sk_buff n, struct* sk_buff *skb)
1542	{
1543	#define C(x) n->x = skb->x
1544
1545	n->next = n->prev = NULL;
1546	n->sk = NULL;
1547	__copy_skb_header(new: n, old: skb);
1548
1549	C(len);
1550	C(data_len);
1551	C(mac_len);
1552	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
1553	n->cloned = `1`;
1554	n->nohdr = `0`;
1555	n->peeked = `0`;
1556	C(pfmemalloc);
1557	C(pp_recycle);
1558	n->destructor = NULL;
1559	C(tail);
1560	C(end);
1561	C(head);
1562	C(head_frag);
1563	C(data);
1564	C(truesize);
1565	refcount_set(r: &n->users, n: `1`);
1566
1567	atomic_inc(v: &(skb_shinfo(skb)->dataref));
1568	skb->cloned = `1`;
1569
1570	return n;
1571	#undef C
1572	}
1573
1574	/**
1575	* alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
1576	* @first: first sk_buff of the msg
1577	*/
1578	struct sk_buff alloc_skb_for_msg(struct* sk_buff *first)
1579	{
1580	struct sk_buff *n;
1581
1582	n = alloc_skb(size: `0`, GFP_ATOMIC);
1583	if (!n)
1584	return NULL;
1585
1586	n->len = first->len;
1587	n->data_len = first->len;
1588	n->truesize = first->truesize;
1589
1590	skb_shinfo(n)->frag_list = first;
1591
1592	__copy_skb_header(new: n, old: first);
1593	n->destructor = NULL;
1594
1595	return n;
1596	}
1597	EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
1598
1599	/**
1600	* skb_morph - morph one skb into another
1601	* @dst: the skb to receive the contents
1602	* @src: the skb to supply the contents
1603	*
1604	* This is identical to skb_clone except that the target skb is
1605	* supplied by the user.
1606	*
1607	* The target skb is returned upon exit.
1608	*/
1609	struct sk_buff skb_morph(struct* sk_buff dst, struct* sk_buff *src)
1610	{
1611	skb_release_all(skb: dst, reason: SKB_CONSUMED);
1612	return __skb_clone(n: dst, skb: src);
1613	}
1614	EXPORT_SYMBOL_GPL(skb_morph);
1615
1616	int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
1617	{
1618	unsigned long max_pg, num_pg, new_pg, old_pg, rlim;
1619	struct user_struct *user;
1620
1621	if (capable(CAP_IPC_LOCK) \|\| !size)
1622	return `0`;
1623
1624	rlim = rlimit(RLIMIT_MEMLOCK);
1625	if (rlim == RLIM_INFINITY)
1626	return `0`;
1627
1628	num_pg = (size >> PAGE_SHIFT) + `2`; / worst case /
1629	max_pg = rlim >> PAGE_SHIFT;
1630	user = mmp->user ? : current_user();
1631
1632	old_pg = atomic_long_read(v: &user->locked_vm);
1633	do {
1634	new_pg = old_pg + num_pg;
1635	if (new_pg > max_pg)
1636	return -ENOBUFS;
1637	} while (!atomic_long_try_cmpxchg(v: &user->locked_vm, old: &old_pg, new: new_pg));
1638
1639	if (!mmp->user) {
1640	mmp->user = get_uid(u: user);
1641	mmp->num_pg = num_pg;
1642	} else {
1643	mmp->num_pg += num_pg;
1644	}
1645
1646	return `0`;
1647	}
1648	EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
1649
1650	void mm_unaccount_pinned_pages(struct mmpin *mmp)
1651	{
1652	if (mmp->user) {
1653	atomic_long_sub(i: mmp->num_pg, v: &mmp->user->locked_vm);
1654	free_uid(mmp->user);
1655	}
1656	}
1657	EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
1658
1659	static struct ubuf_info msg_zerocopy_alloc(struct* sock *sk, size_t size,
1660	bool devmem)
1661	{
1662	struct ubuf_info_msgzc *uarg;
1663	struct sk_buff *skb;
1664
1665	WARN_ON_ONCE(!in_task());
1666
1667	skb = sock_omalloc(sk, size: `0`, GFP_KERNEL);
1668	if (!skb)
1669	return NULL;
1670
1671	BUILD_BUG_ON(sizeof(uarg) > sizeof*(skb->cb));
1672	uarg = (void *)skb->cb;
1673	uarg->mmp.user = NULL;
1674
1675	if (likely(!devmem) && mm_account_pinned_pages(&uarg->mmp, size)) {
1676	kfree_skb(skb);
1677	return NULL;
1678	}
1679
1680	uarg->ubuf.ops = &msg_zerocopy_ubuf_ops;
1681	uarg->id = ((u32)atomic_inc_return(v: &sk->sk_zckey)) - `1`;
1682	uarg->len = `1`;
1683	uarg->bytelen = size;
1684	uarg->zerocopy = `1`;
1685	uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG \| SKBFL_DONT_ORPHAN;
1686	refcount_set(r: &uarg->ubuf.refcnt, n: `1`);
1687	sock_hold(sk);
1688
1689	return &uarg->ubuf;
1690	}
1691
1692	static inline struct sk_buff skb_from_uarg(struct* ubuf_info_msgzc *uarg)
1693	{
1694	return container_of((void )uarg, struct* sk_buff, cb);
1695	}
1696
1697	struct ubuf_info msg_zerocopy_realloc(struct* sock *sk, size_t size,
1698	struct ubuf_info *uarg, bool devmem)
1699	{
1700	if (uarg) {
1701	struct ubuf_info_msgzc *uarg_zc;
1702	const u32 byte_limit = `1` << `19`; / limit to a few TSO /
1703	u32 bytelen, next;
1704
1705	/ there might be non MSG_ZEROCOPY users /
1706	if (uarg->ops != &msg_zerocopy_ubuf_ops)
1707	return NULL;
1708
1709	/ realloc only when socket is locked (TCP, UDP cork),*
1710	* so uarg->len and sk_zckey access is serialized
1711	*/
1712	if (!sock_owned_by_user(sk)) {
1713	WARN_ON_ONCE(`1`);
1714	return NULL;
1715	}
1716
1717	uarg_zc = uarg_to_msgzc(uarg);
1718	bytelen = uarg_zc->bytelen + size;
1719	if (uarg_zc->len == USHRT_MAX - `1` \|\| bytelen > byte_limit) {
1720	/ TCP can create new skb to attach new uarg /
1721	if (sk->sk_type == SOCK_STREAM)
1722	goto new_alloc;
1723	return NULL;
1724	}
1725
1726	next = (u32)atomic_read(v: &sk->sk_zckey);
1727	if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
1728	if (likely(!devmem) &&
1729	mm_account_pinned_pages(&uarg_zc->mmp, size))
1730	return NULL;
1731	uarg_zc->len++;
1732	uarg_zc->bytelen = bytelen;
1733	atomic_set(v: &sk->sk_zckey, i: ++next);
1734
1735	/ no extra ref when appending to datagram (MSG_MORE) /
1736	if (sk->sk_type == SOCK_STREAM)
1737	net_zcopy_get(uarg);
1738
1739	return uarg;
1740	}
1741	}
1742
1743	new_alloc:
1744	return msg_zerocopy_alloc(sk, size, devmem);
1745	}
1746	EXPORT_SYMBOL_GPL(msg_zerocopy_realloc);
1747
1748	static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
1749	{
1750	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
1751	u32 old_lo, old_hi;
1752	u64 sum_len;
1753
1754	old_lo = serr->ee.ee_info;
1755	old_hi = serr->ee.ee_data;
1756	sum_len = old_hi - old_lo + `1ULL` + len;
1757
1758	if (sum_len >= (`1ULL` << `32`))
1759	return false;
1760
1761	if (lo != old_hi + `1`)
1762	return false;
1763
1764	serr->ee.ee_data += len;
1765	return true;
1766	}
1767
1768	static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
1769	{
1770	struct sk_buff tail, skb = skb_from_uarg(uarg);
1771	struct sock_exterr_skb *serr;
1772	struct sock *sk = skb->sk;
1773	struct sk_buff_head *q;
1774	unsigned long flags;
1775	bool is_zerocopy;
1776	u32 lo, hi;
1777	u16 len;
1778
1779	mm_unaccount_pinned_pages(&uarg->mmp);
1780
1781	/ if !len, there was only 1 call, and it was aborted*
1782	* so do not queue a completion notification
1783	*/
1784	if (!uarg->len \|\| sock_flag(sk, flag: SOCK_DEAD))
1785	goto release;
1786
1787	len = uarg->len;
1788	lo = uarg->id;
1789	hi = uarg->id + len - `1`;
1790	is_zerocopy = uarg->zerocopy;
1791
1792	serr = SKB_EXT_ERR(skb);
1793	memset(serr, `0`, sizeof(*serr));
1794	serr->ee.ee_errno = `0`;
1795	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
1796	serr->ee.ee_data = hi;
1797	serr->ee.ee_info = lo;
1798	if (!is_zerocopy)
1799	serr->ee.ee_code \|= SO_EE_CODE_ZEROCOPY_COPIED;
1800
1801	q = &sk->sk_error_queue;
1802	spin_lock_irqsave(&q->lock, flags);
1803	tail = skb_peek_tail(list_: q);
1804	if (!tail \|\| SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY \|\|
1805	!skb_zerocopy_notify_extend(skb: tail, lo, len)) {
1806	__skb_queue_tail(list: q, newsk: skb);
1807	skb = NULL;
1808	}
1809	spin_unlock_irqrestore(lock: &q->lock, flags);
1810
1811	sk_error_report(sk);
1812
1813	release:
1814	consume_skb(skb);
1815	sock_put(sk);
1816	}
1817
1818	static void msg_zerocopy_complete(struct sk_buff skb, struct* ubuf_info *uarg,
1819	bool success)
1820	{
1821	struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
1822
1823	uarg_zc->zerocopy = uarg_zc->zerocopy & success;
1824
1825	if (refcount_dec_and_test(r: &uarg->refcnt))
1826	__msg_zerocopy_callback(uarg: uarg_zc);
1827	}
1828
1829	void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
1830	{
1831	struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;
1832
1833	atomic_dec(v: &sk->sk_zckey);
1834	uarg_to_msgzc(uarg)->len--;
1835
1836	if (have_uref)
1837	msg_zerocopy_complete(NULL, uarg, success: true);
1838	}
1839	EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
1840
1841	const struct ubuf_info_ops msg_zerocopy_ubuf_ops = {
1842	.complete = msg_zerocopy_complete,
1843	};
1844	EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops);
1845
1846	int skb_zerocopy_iter_stream(struct sock sk, struct* sk_buff *skb,
1847	struct msghdr msg, int* len,
1848	struct ubuf_info *uarg,
1849	struct net_devmem_dmabuf_binding *binding)
1850	{
1851	int err, orig_len = skb->len;
1852
1853	if (uarg->ops->link_skb) {
1854	err = uarg->ops->link_skb(skb, uarg);
1855	if (err)
1856	return err;
1857	} else {
1858	struct ubuf_info *orig_uarg = skb_zcopy(skb);
1859
1860	/ An skb can only point to one uarg. This edge case happens*
1861	* when TCP appends to an skb, but zerocopy_realloc triggered
1862	* a new alloc.
1863	*/
1864	if (orig_uarg && uarg != orig_uarg)
1865	return -EEXIST;
1866	}
1867
1868	err = __zerocopy_sg_from_iter(msg, sk, skb, from: &msg->msg_iter, length: len,
1869	binding);
1870	if (err == -EFAULT \|\| (err == -EMSGSIZE && skb->len == orig_len)) {
1871	struct sock *save_sk = skb->sk;
1872
1873	/ Streams do not free skb on error. Reset to prev state. /
1874	iov_iter_revert(i: &msg->msg_iter, bytes: skb->len - orig_len);
1875	skb->sk = sk;
1876	___pskb_trim(skb, len: orig_len);
1877	skb->sk = save_sk;
1878	return err;
1879	}
1880
1881	skb_zcopy_set(skb, uarg, NULL);
1882	return skb->len - orig_len;
1883	}
1884	EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
1885
1886	void __skb_zcopy_downgrade_managed(struct sk_buff *skb)
1887	{
1888	int i;
1889
1890	skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS;
1891	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
1892	skb_frag_ref(skb, f: i);
1893	}
1894	EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed);
1895
1896	static int skb_zerocopy_clone(struct sk_buff nskb, struct* sk_buff *orig,
1897	gfp_t gfp_mask)
1898	{
1899	if (skb_zcopy(skb: orig)) {
1900	if (skb_zcopy(skb: nskb)) {
1901	/ !gfp_mask callers are verified to !skb_zcopy(nskb) /
1902	if (!gfp_mask) {
1903	WARN_ON_ONCE(`1`);
1904	return -ENOMEM;
1905	}
1906	if (skb_uarg(nskb) == skb_uarg(orig))
1907	return `0`;
1908	if (skb_copy_ubufs(skb: nskb, GFP_ATOMIC))
1909	return -EIO;
1910	}
1911	skb_zcopy_set(skb: nskb, skb_uarg(orig), NULL);
1912	}
1913	return `0`;
1914	}
1915
1916	/**
1917	* skb_copy_ubufs - copy userspace skb frags buffers to kernel
1918	* @skb: the skb to modify
1919	* @gfp_mask: allocation priority
1920	*
1921	* This must be called on skb with SKBFL_ZEROCOPY_ENABLE.
1922	* It will copy all frags into kernel and drop the reference
1923	* to userspace pages.
1924	*
1925	* If this function is called from an interrupt gfp_mask() must be
1926	* %GFP_ATOMIC.
1927	*
1928	* Returns 0 on success or a negative error code on failure
1929	* to allocate kernel memory to copy to.
1930	*/
1931	int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1932	{
1933	int num_frags = skb_shinfo(skb)->nr_frags;
1934	struct page page, head = NULL;
1935	int i, order, psize, new_frags;
1936	u32 d_off;
1937
1938	if (skb_shared(skb) \|\| skb_unclone(skb, pri: gfp_mask))
1939	return -EINVAL;
1940
1941	if (!skb_frags_readable(skb))
1942	return -EFAULT;
1943
1944	if (!num_frags)
1945	goto release;
1946
1947	/ We might have to allocate high order pages, so compute what minimum*
1948	* page order is needed.
1949	*/
1950	order = `0`;
1951	while ((PAGE_SIZE << order) * MAX_SKB_FRAGS < __skb_pagelen(skb))
1952	order++;
1953	psize = (PAGE_SIZE << order);
1954
1955	new_frags = (__skb_pagelen(skb) + psize - `1`) >> (PAGE_SHIFT + order);
1956	for (i = `0`; i < new_frags; i++) {
1957	page = alloc_pages(gfp_mask \| __GFP_COMP, order);
1958	if (!page) {
1959	while (head) {
1960	struct page next = (struct* page *)page_private(head);
1961	put_page(page: head);
1962	head = next;
1963	}
1964	return -ENOMEM;
1965	}
1966	set_page_private(page, private: (unsigned long)head);
1967	head = page;
1968	}
1969
1970	page = head;
1971	d_off = `0`;
1972	for (i = `0`; i < num_frags; i++) {
1973	skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1974	u32 p_off, p_len, copied;
1975	struct page *p;
1976	u8 *vaddr;
1977
1978	skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
1979	p, p_off, p_len, copied) {
1980	u32 copy, done = `0`;
1981	vaddr = kmap_atomic(page: p);
1982
1983	while (done < p_len) {
1984	if (d_off == psize) {
1985	d_off = `0`;
1986	page = (struct page *)page_private(page);
1987	}
1988	copy = min_t(u32, psize - d_off, p_len - done);
1989	memcpy(page_address(page) + d_off,
1990	vaddr + p_off + done, copy);
1991	done += copy;
1992	d_off += copy;
1993	}
1994	kunmap_atomic(vaddr);
1995	}
1996	}
1997
1998	/ skb frags release userspace buffers /
1999	for (i = `0`; i < num_frags; i++)
2000	skb_frag_unref(skb, f: i);
2001
2002	/ skb frags point to kernel buffers /
2003	for (i = `0`; i < new_frags - `1`; i++) {
2004	__skb_fill_netmem_desc(skb, i, netmem: page_to_netmem(page: head), off: `0`, size: psize);
2005	head = (struct page *)page_private(head);
2006	}
2007	__skb_fill_netmem_desc(skb, i: new_frags - `1`, netmem: page_to_netmem(page: head), off: `0`,
2008	size: d_off);
2009	skb_shinfo(skb)->nr_frags = new_frags;
2010
2011	release:
2012	skb_zcopy_clear(skb, zerocopy_success: false);
2013	return `0`;
2014	}
2015	EXPORT_SYMBOL_GPL(skb_copy_ubufs);
2016
2017	/**
2018	* skb_clone - duplicate an sk_buff
2019	* @skb: buffer to clone
2020	* @gfp_mask: allocation priority
2021	*
2022	* Duplicate an &sk_buff. The new one is not owned by a socket. Both
2023	* copies share the same packet data but not structure. The new
2024	* buffer has a reference count of 1. If the allocation fails the
2025	* function returns %NULL otherwise the new buffer is returned.
2026	*
2027	* If this function is called from an interrupt gfp_mask() must be
2028	* %GFP_ATOMIC.
2029	*/
2030
2031	struct sk_buff skb_clone(struct* sk_buff *skb, gfp_t gfp_mask)
2032	{
2033	struct sk_buff_fclones *fclones = container_of(skb,
2034	struct sk_buff_fclones,
2035	skb1);
2036	struct sk_buff *n;
2037
2038	if (skb_orphan_frags(skb, gfp_mask))
2039	return NULL;
2040
2041	if (skb->fclone == SKB_FCLONE_ORIG &&
2042	refcount_read(r: &fclones->fclone_ref) == `1`) {
2043	n = &fclones->skb2;
2044	refcount_set(r: &fclones->fclone_ref, n: `2`);
2045	n->fclone = SKB_FCLONE_CLONE;
2046	} else {
2047	if (skb_pfmemalloc(skb))
2048	gfp_mask \|= __GFP_MEMALLOC;
2049
2050	n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask);
2051	if (!n)
2052	return NULL;
2053
2054	n->fclone = SKB_FCLONE_UNAVAILABLE;
2055	}
2056
2057	return __skb_clone(n, skb);
2058	}
2059	EXPORT_SYMBOL(skb_clone);
2060
2061	void skb_headers_offset_update(struct sk_buff skb, int* off)
2062	{
2063	/ Only adjust this if it actually is csum_start rather than csum /
2064	if (skb->ip_summed == CHECKSUM_PARTIAL)
2065	skb->csum_start += off;
2066	/ {transport,network,mac}_header and tail are relative to skb->head /
2067	skb->transport_header += off;
2068	skb->network_header += off;
2069	if (skb_mac_header_was_set(skb))
2070	skb->mac_header += off;
2071	skb->inner_transport_header += off;
2072	skb->inner_network_header += off;
2073	skb->inner_mac_header += off;
2074	}
2075	EXPORT_SYMBOL(skb_headers_offset_update);
2076
2077	void skb_copy_header(struct sk_buff new, const* struct sk_buff *old)
2078	{
2079	__copy_skb_header(new, old);
2080
2081	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
2082	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
2083	skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
2084	}
2085	EXPORT_SYMBOL(skb_copy_header);
2086
2087	static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
2088	{
2089	if (skb_pfmemalloc(skb))
2090	return SKB_ALLOC_RX;
2091	return `0`;
2092	}
2093
2094	/**
2095	* skb_copy - create private copy of an sk_buff
2096	* @skb: buffer to copy
2097	* @gfp_mask: allocation priority
2098	*
2099	* Make a copy of both an &sk_buff and its data. This is used when the
2100	* caller wishes to modify the data and needs a private copy of the
2101	* data to alter. Returns %NULL on failure or the pointer to the buffer
2102	* on success. The returned buffer has a reference count of 1.
2103	*
2104	* As by-product this function converts non-linear &sk_buff to linear
2105	* one, so that &sk_buff becomes completely private and caller is allowed
2106	* to modify all the data of returned buffer. This means that this
2107	* function is not recommended for use in circumstances when only
2108	* header is going to be modified. Use pskb_copy() instead.
2109	*/
2110
2111	struct sk_buff skb_copy(const* struct sk_buff *skb, gfp_t gfp_mask)
2112	{
2113	struct sk_buff *n;
2114	unsigned int size;
2115	int headerlen;
2116
2117	if (!skb_frags_readable(skb))
2118	return NULL;
2119
2120	if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
2121	return NULL;
2122
2123	headerlen = skb_headroom(skb);
2124	size = skb_end_offset(skb) + skb->data_len;
2125	n = __alloc_skb(size, gfp_mask,
2126	skb_alloc_rx_flag(skb), NUMA_NO_NODE);
2127	if (!n)
2128	return NULL;
2129
2130	/ Set the data pointer /
2131	skb_reserve(skb: n, len: headerlen);
2132	/ Set the tail pointer and length /
2133	skb_put(skb: n, len: skb->len);
2134
2135	BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
2136
2137	skb_copy_header(n, skb);
2138	return n;
2139	}
2140	EXPORT_SYMBOL(skb_copy);
2141
2142	/**
2143	* __pskb_copy_fclone - create copy of an sk_buff with private head.
2144	* @skb: buffer to copy
2145	* @headroom: headroom of new skb
2146	* @gfp_mask: allocation priority
2147	* @fclone: if true allocate the copy of the skb from the fclone
2148	* cache instead of the head cache; it is recommended to set this
2149	* to true for the cases where the copy will likely be cloned
2150	*
2151	* Make a copy of both an &sk_buff and part of its data, located
2152	* in header. Fragmented data remain shared. This is used when
2153	* the caller wishes to modify only header of &sk_buff and needs
2154	* private copy of the header to alter. Returns %NULL on failure
2155	* or the pointer to the buffer on success.
2156	* The returned buffer has a reference count of 1.
2157	*/
2158
2159	struct sk_buff __pskb_copy_fclone(struct* sk_buff skb, int* headroom,
2160	gfp_t gfp_mask, bool fclone)
2161	{
2162	unsigned int size = skb_headlen(skb) + headroom;
2163	int flags = skb_alloc_rx_flag(skb) \| (fclone ? SKB_ALLOC_FCLONE : `0`);
2164	struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
2165
2166	if (!n)
2167	goto out;
2168
2169	/ Set the data pointer /
2170	skb_reserve(skb: n, len: headroom);
2171	/ Set the tail pointer and length /
2172	skb_put(skb: n, len: skb_headlen(skb));
2173	/ Copy the bytes /
2174	skb_copy_from_linear_data(skb, to: n->data, len: n->len);
2175
2176	n->truesize += skb->data_len;
2177	n->data_len = skb->data_len;
2178	n->len = skb->len;
2179
2180	if (skb_shinfo(skb)->nr_frags) {
2181	int i;
2182
2183	if (skb_orphan_frags(skb, gfp_mask) \|\|
2184	skb_zerocopy_clone(nskb: n, orig: skb, gfp_mask)) {
2185	kfree_skb(skb: n);
2186	n = NULL;
2187	goto out;
2188	}
2189	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2190	skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
2191	skb_frag_ref(skb, f: i);
2192	}
2193	skb_shinfo(n)->nr_frags = i;
2194	}
2195
2196	if (skb_has_frag_list(skb)) {
2197	skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
2198	skb_clone_fraglist(skb: n);
2199	}
2200
2201	skb_copy_header(n, skb);
2202	out:
2203	return n;
2204	}
2205	EXPORT_SYMBOL(__pskb_copy_fclone);
2206
2207	/**
2208	* pskb_expand_head - reallocate header of &sk_buff
2209	* @skb: buffer to reallocate
2210	* @nhead: room to add at head
2211	* @ntail: room to add at tail
2212	* @gfp_mask: allocation priority
2213	*
2214	* Expands (or creates identical copy, if @nhead and @ntail are zero)
2215	* header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
2216	* reference count of 1. Returns zero in the case of success or error,
2217	* if expansion failed. In the last case, &sk_buff is not changed.
2218	*
2219	* All the pointers pointing into skb header may change and must be
2220	* reloaded after call to this function.
2221	*/
2222
2223	int pskb_expand_head(struct sk_buff skb, int* nhead, int ntail,
2224	gfp_t gfp_mask)
2225	{
2226	unsigned int osize = skb_end_offset(skb);
2227	unsigned int size = osize + nhead + ntail;
2228	long off;
2229	u8 *data;
2230	int i;
2231
2232	BUG_ON(nhead < `0`);
2233
2234	BUG_ON(skb_shared(skb));
2235
2236	skb_zcopy_downgrade_managed(skb);
2237
2238	if (skb_pfmemalloc(skb))
2239	gfp_mask \|= __GFP_MEMALLOC;
2240
2241	data = kmalloc_reserve(size: &size, flags: gfp_mask, NUMA_NO_NODE, NULL);
2242	if (!data)
2243	goto nodata;
2244	size = SKB_WITH_OVERHEAD(size);
2245
2246	/ Copy only real data... and, alas, header. This should be*
2247	* optimized for the cases when header is void.
2248	*/
2249	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
2250
2251	memcpy((struct skb_shared_info *)(data + size),
2252	skb_shinfo(skb),
2253	offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
2254
2255	/*
2256	* if shinfo is shared we must drop the old head gracefully, but if it
2257	* is not we can just drop the old head and let the existing refcount
2258	* be since all we did is relocate the values
2259	*/
2260	if (skb_cloned(skb)) {
2261	if (skb_orphan_frags(skb, gfp_mask))
2262	goto nofrags;
2263	if (skb_zcopy(skb))
2264	refcount_inc(r: &skb_uarg(skb)->refcnt);
2265	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
2266	skb_frag_ref(skb, f: i);
2267
2268	if (skb_has_frag_list(skb))
2269	skb_clone_fraglist(skb);
2270
2271	skb_release_data(skb, reason: SKB_CONSUMED);
2272	} else {
2273	skb_free_head(skb);
2274	}
2275	off = (data + nhead) - skb->head;
2276
2277	skb->head = data;
2278	skb->head_frag = `0`;
2279	skb->data += off;
2280
2281	skb_set_end_offset(skb, offset: size);
2282	#ifdef NET_SKBUFF_DATA_USES_OFFSET
2283	off = nhead;
2284	#endif
2285	skb->tail += off;
2286	skb_headers_offset_update(skb, nhead);
2287	skb->cloned = `0`;
2288	skb->hdr_len = `0`;
2289	skb->nohdr = `0`;
2290	atomic_set(v: &skb_shinfo(skb)->dataref, i: `1`);
2291
2292	skb_metadata_clear(skb);
2293
2294	/ It is not generally safe to change skb->truesize.*
2295	* For the moment, we really care of rx path, or
2296	* when skb is orphaned (not attached to a socket).
2297	*/
2298	if (!skb->sk \|\| skb->destructor == sock_edemux)
2299	skb->truesize += size - osize;
2300
2301	return `0`;
2302
2303	nofrags:
2304	skb_kfree_head(head: data, end_offset: size);
2305	nodata:
2306	return -ENOMEM;
2307	}
2308	EXPORT_SYMBOL(pskb_expand_head);
2309
2310	/ Make private copy of skb with writable head and some headroom /
2311
2312	struct sk_buff skb_realloc_headroom(struct* sk_buff skb, unsigned* int headroom)
2313	{
2314	struct sk_buff *skb2;
2315	int delta = headroom - skb_headroom(skb);
2316
2317	if (delta <= `0`)
2318	skb2 = pskb_copy(skb, GFP_ATOMIC);
2319	else {
2320	skb2 = skb_clone(skb, GFP_ATOMIC);
2321	if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), `0`,
2322	GFP_ATOMIC)) {
2323	kfree_skb(skb: skb2);
2324	skb2 = NULL;
2325	}
2326	}
2327	return skb2;
2328	}
2329	EXPORT_SYMBOL(skb_realloc_headroom);
2330
2331	/ Note: We plan to rework this in linux-6.4 /
2332	int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
2333	{
2334	unsigned int saved_end_offset, saved_truesize;
2335	struct skb_shared_info *shinfo;
2336	int res;
2337
2338	saved_end_offset = skb_end_offset(skb);
2339	saved_truesize = skb->truesize;
2340
2341	res = pskb_expand_head(skb, `0`, `0`, pri);
2342	if (res)
2343	return res;
2344
2345	skb->truesize = saved_truesize;
2346
2347	if (likely(skb_end_offset(skb) == saved_end_offset))
2348	return `0`;
2349
2350	/ We can not change skb->end if the original or new value*
2351	* is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head().
2352	*/
2353	if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM \|\|
2354	skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) {
2355	/ We think this path should not be taken.*
2356	* Add a temporary trace to warn us just in case.
2357	*/
2358	pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n",
2359	saved_end_offset, skb_end_offset(skb));
2360	WARN_ON_ONCE(`1`);
2361	return `0`;
2362	}
2363
2364	shinfo = skb_shinfo(skb);
2365
2366	/ We are about to change back skb->end,*
2367	* we need to move skb_shinfo() to its new location.
2368	*/
2369	memmove(skb->head + saved_end_offset,
2370	shinfo,
2371	offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
2372
2373	skb_set_end_offset(skb, offset: saved_end_offset);
2374
2375	return `0`;
2376	}
2377
2378	/**
2379	* skb_expand_head - reallocate header of &sk_buff
2380	* @skb: buffer to reallocate
2381	* @headroom: needed headroom
2382	*
2383	* Unlike skb_realloc_headroom, this one does not allocate a new skb
2384	* if possible; copies skb->sk to new skb as needed
2385	* and frees original skb in case of failures.
2386	*
2387	* It expect increased headroom and generates warning otherwise.
2388	*/
2389
2390	struct sk_buff skb_expand_head(struct* sk_buff skb, unsigned* int headroom)
2391	{
2392	int delta = headroom - skb_headroom(skb);
2393	int osize = skb_end_offset(skb);
2394	struct sock *sk = skb->sk;
2395
2396	if (WARN_ONCE(delta <= `0`,
2397	"%s is expecting an increase in the headroom", __func__))
2398	return skb;
2399
2400	delta = SKB_DATA_ALIGN(delta);
2401	/ pskb_expand_head() might crash, if skb is shared. /
2402	if (skb_shared(skb) \|\| !is_skb_wmem(skb)) {
2403	struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2404
2405	if (unlikely(!nskb))
2406	goto fail;
2407
2408	if (sk)
2409	skb_set_owner_w(skb: nskb, sk);
2410	consume_skb(skb);
2411	skb = nskb;
2412	}
2413	if (pskb_expand_head(skb, delta, `0`, GFP_ATOMIC))
2414	goto fail;
2415
2416	if (sk && is_skb_wmem(skb)) {
2417	delta = skb_end_offset(skb) - osize;
2418	refcount_add(i: delta, r: &sk->sk_wmem_alloc);
2419	skb->truesize += delta;
2420	}
2421	return skb;
2422
2423	fail:
2424	kfree_skb(skb);
2425	return NULL;
2426	}
2427	EXPORT_SYMBOL(skb_expand_head);
2428
2429	/**
2430	* skb_copy_expand - copy and expand sk_buff
2431	* @skb: buffer to copy
2432	* @newheadroom: new free bytes at head
2433	* @newtailroom: new free bytes at tail
2434	* @gfp_mask: allocation priority
2435	*
2436	* Make a copy of both an &sk_buff and its data and while doing so
2437	* allocate additional space.
2438	*
2439	* This is used when the caller wishes to modify the data and needs a
2440	* private copy of the data to alter as well as more space for new fields.
2441	* Returns %NULL on failure or the pointer to the buffer
2442	* on success. The returned buffer has a reference count of 1.
2443	*
2444	* You must pass %GFP_ATOMIC as the allocation priority if this function
2445	* is called from an interrupt.
2446	*/
2447	struct sk_buff skb_copy_expand(const* struct sk_buff *skb,
2448	int newheadroom, int newtailroom,
2449	gfp_t gfp_mask)
2450	{
2451	/*
2452	* Allocate the copy buffer
2453	*/
2454	int head_copy_len, head_copy_off;
2455	struct sk_buff *n;
2456	int oldheadroom;
2457
2458	if (!skb_frags_readable(skb))
2459	return NULL;
2460
2461	if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
2462	return NULL;
2463
2464	oldheadroom = skb_headroom(skb);
2465	n = __alloc_skb(newheadroom + skb->len + newtailroom,
2466	gfp_mask, skb_alloc_rx_flag(skb),
2467	NUMA_NO_NODE);
2468	if (!n)
2469	return NULL;
2470
2471	skb_reserve(skb: n, len: newheadroom);
2472
2473	/ Set the tail pointer and length /
2474	skb_put(skb: n, len: skb->len);
2475
2476	head_copy_len = oldheadroom;
2477	head_copy_off = `0`;
2478	if (newheadroom <= head_copy_len)
2479	head_copy_len = newheadroom;
2480	else
2481	head_copy_off = newheadroom - head_copy_len;
2482
2483	/ Copy the linear header and data. /
2484	BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
2485	skb->len + head_copy_len));
2486
2487	skb_copy_header(n, skb);
2488
2489	skb_headers_offset_update(n, newheadroom - oldheadroom);
2490
2491	return n;
2492	}
2493	EXPORT_SYMBOL(skb_copy_expand);
2494
2495	/**
2496	* __skb_pad - zero pad the tail of an skb
2497	* @skb: buffer to pad
2498	* @pad: space to pad
2499	* @free_on_error: free buffer on error
2500	*
2501	* Ensure that a buffer is followed by a padding area that is zero
2502	* filled. Used by network drivers which may DMA or transfer data
2503	* beyond the buffer end onto the wire.
2504	*
2505	* May return error in out of memory cases. The skb is freed on error
2506	* if @free_on_error is true.
2507	*/
2508
2509	int __skb_pad(struct sk_buff skb, int* pad, bool free_on_error)
2510	{
2511	int err;
2512	int ntail;
2513
2514	/ If the skbuff is non linear tailroom is always zero.. /
2515	if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
2516	memset(skb->data+skb->len, `0`, pad);
2517	return `0`;
2518	}
2519
2520	ntail = skb->data_len + pad - (skb->end - skb->tail);
2521	if (likely(skb_cloned(skb) \|\| ntail > `0`)) {
2522	err = pskb_expand_head(skb, `0`, ntail, GFP_ATOMIC);
2523	if (unlikely(err))
2524	goto free_skb;
2525	}
2526
2527	/ FIXME: The use of this function with non-linear skb's really needs*
2528	* to be audited.
2529	*/
2530	err = skb_linearize(skb);
2531	if (unlikely(err))
2532	goto free_skb;
2533
2534	memset(skb->data + skb->len, `0`, pad);
2535	return `0`;
2536
2537	free_skb:
2538	if (free_on_error)
2539	kfree_skb(skb);
2540	return err;
2541	}
2542	EXPORT_SYMBOL(__skb_pad);
2543
2544	/**
2545	* pskb_put - add data to the tail of a potentially fragmented buffer
2546	* @skb: start of the buffer to use
2547	* @tail: tail fragment of the buffer to use
2548	* @len: amount of data to add
2549	*
2550	* This function extends the used data area of the potentially
2551	* fragmented buffer. @tail must be the last fragment of @skb -- or
2552	* @skb itself. If this would exceed the total buffer size the kernel
2553	* will panic. A pointer to the first byte of the extra data is
2554	* returned.
2555	*/
2556
2557	void pskb_put(struct* sk_buff skb, struct* sk_buff tail, int* len)
2558	{
2559	if (tail != skb) {
2560	skb->data_len += len;
2561	skb->len += len;
2562	}
2563	return skb_put(skb: tail, len);
2564	}
2565	EXPORT_SYMBOL_GPL(pskb_put);
2566
2567	/**
2568	* skb_put - add data to a buffer
2569	* @skb: buffer to use
2570	* @len: amount of data to add
2571	*
2572	* This function extends the used data area of the buffer. If this would
2573	* exceed the total buffer size the kernel will panic. A pointer to the
2574	* first byte of the extra data is returned.
2575	*/
2576	void skb_put(struct* sk_buff skb, unsigned* int len)
2577	{
2578	void *tmp = skb_tail_pointer(skb);
2579	SKB_LINEAR_ASSERT(skb);
2580	skb->tail += len;
2581	skb->len += len;
2582	if (unlikely(skb->tail > skb->end))
2583	skb_over_panic(skb, sz: len, addr: __builtin_return_address(`0`));
2584	return tmp;
2585	}
2586	EXPORT_SYMBOL(skb_put);
2587
2588	/**
2589	* skb_push - add data to the start of a buffer
2590	* @skb: buffer to use
2591	* @len: amount of data to add
2592	*
2593	* This function extends the used data area of the buffer at the buffer
2594	* start. If this would exceed the total buffer headroom the kernel will
2595	* panic. A pointer to the first byte of the extra data is returned.
2596	*/
2597	void skb_push(struct* sk_buff skb, unsigned* int len)
2598	{
2599	skb->data -= len;
2600	skb->len += len;
2601	if (unlikely(skb->data < skb->head))
2602	skb_under_panic(skb, sz: len, addr: __builtin_return_address(`0`));
2603	return skb->data;
2604	}
2605	EXPORT_SYMBOL(skb_push);
2606
2607	/**
2608	* skb_pull - remove data from the start of a buffer
2609	* @skb: buffer to use
2610	* @len: amount of data to remove
2611	*
2612	* This function removes data from the start of a buffer, returning
2613	* the memory to the headroom. A pointer to the next data in the buffer
2614	* is returned. Once the data has been pulled future pushes will overwrite
2615	* the old data.
2616	*/
2617	void skb_pull(struct* sk_buff skb, unsigned* int len)
2618	{
2619	return skb_pull_inline(skb, len);
2620	}
2621	EXPORT_SYMBOL(skb_pull);
2622
2623	/**
2624	* skb_pull_data - remove data from the start of a buffer returning its
2625	* original position.
2626	* @skb: buffer to use
2627	* @len: amount of data to remove
2628	*
2629	* This function removes data from the start of a buffer, returning
2630	* the memory to the headroom. A pointer to the original data in the buffer
2631	* is returned after checking if there is enough data to pull. Once the
2632	* data has been pulled future pushes will overwrite the old data.
2633	*/
2634	void skb_pull_data(struct* sk_buff *skb, size_t len)
2635	{
2636	void *data = skb->data;
2637
2638	if (skb->len < len)
2639	return NULL;
2640
2641	skb_pull(skb, len);
2642
2643	return data;
2644	}
2645	EXPORT_SYMBOL(skb_pull_data);
2646
2647	/**
2648	* skb_trim - remove end from a buffer
2649	* @skb: buffer to alter
2650	* @len: new length
2651	*
2652	* Cut the length of a buffer down by removing data from the tail. If
2653	* the buffer is already under the length specified it is not modified.
2654	* The skb must be linear.
2655	*/
2656	void skb_trim(struct sk_buff skb, unsigned* int len)
2657	{
2658	if (skb->len > len)
2659	__skb_trim(skb, len);
2660	}
2661	EXPORT_SYMBOL(skb_trim);
2662
2663	/ Trims skb to length len. It can change skb pointers.*
2664	*/
2665
2666	int ___pskb_trim(struct sk_buff skb, unsigned* int len)
2667	{
2668	struct sk_buff **fragp;
2669	struct sk_buff *frag;
2670	int offset = skb_headlen(skb);
2671	int nfrags = skb_shinfo(skb)->nr_frags;
2672	int i;
2673	int err;
2674
2675	if (skb_cloned(skb) &&
2676	unlikely((err = pskb_expand_head(skb, `0`, `0`, GFP_ATOMIC))))
2677	return err;
2678
2679	i = `0`;
2680	if (offset >= len)
2681	goto drop_pages;
2682
2683	for (; i < nfrags; i++) {
2684	int end = offset + skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
2685
2686	if (end < len) {
2687	offset = end;
2688	continue;
2689	}
2690
2691	skb_frag_size_set(frag: &skb_shinfo(skb)->frags[i++], size: len - offset);
2692
2693	drop_pages:
2694	skb_shinfo(skb)->nr_frags = i;
2695
2696	for (; i < nfrags; i++)
2697	skb_frag_unref(skb, f: i);
2698
2699	if (skb_has_frag_list(skb))
2700	skb_drop_fraglist(skb);
2701	goto done;
2702	}
2703
2704	for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
2705	fragp = &frag->next) {
2706	int end = offset + frag->len;
2707
2708	if (skb_shared(skb: frag)) {
2709	struct sk_buff *nfrag;
2710
2711	nfrag = skb_clone(frag, GFP_ATOMIC);
2712	if (unlikely(!nfrag))
2713	return -ENOMEM;
2714
2715	nfrag->next = frag->next;
2716	consume_skb(frag);
2717	frag = nfrag;
2718	*fragp = frag;
2719	}
2720
2721	if (end < len) {
2722	offset = end;
2723	continue;
2724	}
2725
2726	if (end > len &&
2727	unlikely((err = pskb_trim(frag, len - offset))))
2728	return err;
2729
2730	if (frag->next)
2731	skb_drop_list(listp: &frag->next);
2732	break;
2733	}
2734
2735	done:
2736	if (len > skb_headlen(skb)) {
2737	skb->data_len -= skb->len - len;
2738	skb->len = len;
2739	} else {
2740	skb->len = len;
2741	skb->data_len = `0`;
2742	skb_set_tail_pointer(skb, offset: len);
2743	}
2744
2745	if (!skb->sk \|\| skb->destructor == sock_edemux)
2746	skb_condense(skb);
2747	return `0`;
2748	}
2749	EXPORT_SYMBOL(___pskb_trim);
2750
2751	/ Note : use pskb_trim_rcsum() instead of calling this directly*
2752	*/
2753	int pskb_trim_rcsum_slow(struct sk_buff skb, unsigned* int len)
2754	{
2755	if (skb->ip_summed == CHECKSUM_COMPLETE) {
2756	int delta = skb->len - len;
2757
2758	skb->csum = csum_block_sub(csum: skb->csum,
2759	csum2: skb_checksum(skb, offset: len, len: delta, csum: `0`),
2760	offset: len);
2761	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
2762	int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
2763	int offset = skb_checksum_start_offset(skb) + skb->csum_offset;
2764
2765	if (offset + sizeof(__sum16) > hdlen)
2766	return -EINVAL;
2767	}
2768	return __pskb_trim(skb, len);
2769	}
2770	EXPORT_SYMBOL(pskb_trim_rcsum_slow);
2771
2772	/**
2773	* __pskb_pull_tail - advance tail of skb header
2774	* @skb: buffer to reallocate
2775	* @delta: number of bytes to advance tail
2776	*
2777	* The function makes a sense only on a fragmented &sk_buff,
2778	* it expands header moving its tail forward and copying necessary
2779	* data from fragmented part.
2780	*
2781	* &sk_buff MUST have reference count of 1.
2782	*
2783	* Returns %NULL (and &sk_buff does not change) if pull failed
2784	* or value of new tail of skb in the case of success.
2785	*
2786	* All the pointers pointing into skb header may change and must be
2787	* reloaded after call to this function.
2788	*/
2789
2790	/ Moves tail of skb head forward, copying data from fragmented part,*
2791	* when it is necessary.
2792	* 1. It may fail due to malloc failure.
2793	* 2. It may change skb pointers.
2794	*
2795	* It is pretty complicated. Luckily, it is called only in exceptional cases.
2796	*/
2797	void __pskb_pull_tail(struct* sk_buff skb, int* delta)
2798	{
2799	/ If skb has not enough free space at tail, get new one*
2800	* plus 128 bytes for future expansions. If we have enough
2801	* room at tail, reallocate without expansion only if skb is cloned.
2802	*/
2803	int i, k, eat = (skb->tail + delta) - skb->end;
2804
2805	if (!skb_frags_readable(skb))
2806	return NULL;
2807
2808	if (eat > `0` \|\| skb_cloned(skb)) {
2809	if (pskb_expand_head(skb, `0`, eat > `0` ? eat + `128` : `0`,
2810	GFP_ATOMIC))
2811	return NULL;
2812	}
2813
2814	BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
2815	skb_tail_pointer(skb), delta));
2816
2817	/ Optimization: no fragments, no reasons to preestimate*
2818	* size of pulled pages. Superb.
2819	*/
2820	if (!skb_has_frag_list(skb))
2821	goto pull_pages;
2822
2823	/ Estimate size of pulled pages. /
2824	eat = delta;
2825	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2826	int size = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
2827
2828	if (size >= eat)
2829	goto pull_pages;
2830	eat -= size;
2831	}
2832
2833	/ If we need update frag list, we are in troubles.*
2834	* Certainly, it is possible to add an offset to skb data,
2835	* but taking into account that pulling is expected to
2836	* be very rare operation, it is worth to fight against
2837	* further bloating skb head and crucify ourselves here instead.
2838	* Pure masohism, indeed. 8)8)
2839	*/
2840	if (eat) {
2841	struct sk_buff *list = skb_shinfo(skb)->frag_list;
2842	struct sk_buff *clone = NULL;
2843	struct sk_buff *insp = NULL;
2844
2845	do {
2846	if (list->len <= eat) {
2847	/ Eaten as whole. /
2848	eat -= list->len;
2849	list = list->next;
2850	insp = list;
2851	} else {
2852	/ Eaten partially. /
2853	if (skb_is_gso(skb) && !list->head_frag &&
2854	skb_headlen(skb: list))
2855	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;
2856
2857	if (skb_shared(skb: list)) {
2858	/ Sucks! We need to fork list. :-( /
2859	clone = skb_clone(list, GFP_ATOMIC);
2860	if (!clone)
2861	return NULL;
2862	insp = list->next;
2863	list = clone;
2864	} else {
2865	/ This may be pulled without*
2866	* problems. */
2867	insp = list;
2868	}
2869	if (!pskb_pull(skb: list, len: eat)) {
2870	kfree_skb(skb: clone);
2871	return NULL;
2872	}
2873	break;
2874	}
2875	} while (eat);
2876
2877	/ Free pulled out fragments. /
2878	while ((list = skb_shinfo(skb)->frag_list) != insp) {
2879	skb_shinfo(skb)->frag_list = list->next;
2880	consume_skb(list);
2881	}
2882	/ And insert new clone at head. /
2883	if (clone) {
2884	clone->next = list;
2885	skb_shinfo(skb)->frag_list = clone;
2886	}
2887	}
2888	/ Success! Now we may commit changes to skb data. /
2889
2890	pull_pages:
2891	eat = delta;
2892	k = `0`;
2893	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2894	int size = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
2895
2896	if (size <= eat) {
2897	skb_frag_unref(skb, f: i);
2898	eat -= size;
2899	} else {
2900	skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
2901
2902	*frag = skb_shinfo(skb)->frags[i];
2903	if (eat) {
2904	skb_frag_off_add(frag, delta: eat);
2905	skb_frag_size_sub(frag, delta: eat);
2906	if (!i)
2907	goto end;
2908	eat = `0`;
2909	}
2910	k++;
2911	}
2912	}
2913	skb_shinfo(skb)->nr_frags = k;
2914
2915	end:
2916	skb->tail += delta;
2917	skb->data_len -= delta;
2918
2919	if (!skb->data_len)
2920	skb_zcopy_clear(skb, zerocopy_success: false);
2921
2922	return skb_tail_pointer(skb);
2923	}
2924	EXPORT_SYMBOL(__pskb_pull_tail);
2925
2926	/**
2927	* skb_copy_bits - copy bits from skb to kernel buffer
2928	* @skb: source skb
2929	* @offset: offset in source
2930	* @to: destination buffer
2931	* @len: number of bytes to copy
2932	*
2933	* Copy the specified number of bytes from the source skb to the
2934	* destination buffer.
2935	*
2936	* CAUTION ! :
2937	* If its prototype is ever changed,
2938	* check arch/{}/net/{}.S files,
2939	* since it is called from BPF assembly code.
2940	*/
2941	int skb_copy_bits(const struct sk_buff skb, int* offset, void to, int* len)
2942	{
2943	int start = skb_headlen(skb);
2944	struct sk_buff *frag_iter;
2945	int i, copy;
2946
2947	if (offset > (int)skb->len - len)
2948	goto fault;
2949
2950	/ Copy header. /
2951	if ((copy = start - offset) > `0`) {
2952	if (copy > len)
2953	copy = len;
2954	skb_copy_from_linear_data_offset(skb, offset, to, len: copy);
2955	if ((len -= copy) == `0`)
2956	return `0`;
2957	offset += copy;
2958	to += copy;
2959	}
2960
2961	if (!skb_frags_readable(skb))
2962	goto fault;
2963
2964	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2965	int end;
2966	skb_frag_t *f = &skb_shinfo(skb)->frags[i];
2967
2968	WARN_ON(start > offset + len);
2969
2970	end = start + skb_frag_size(frag: f);
2971	if ((copy = end - offset) > `0`) {
2972	u32 p_off, p_len, copied;
2973	struct page *p;
2974	u8 *vaddr;
2975
2976	if (copy > len)
2977	copy = len;
2978
2979	skb_frag_foreach_page(f,
2980	skb_frag_off(f) + offset - start,
2981	copy, p, p_off, p_len, copied) {
2982	vaddr = kmap_atomic(page: p);
2983	memcpy(to + copied, vaddr + p_off, p_len);
2984	kunmap_atomic(vaddr);
2985	}
2986
2987	if ((len -= copy) == `0`)
2988	return `0`;
2989	offset += copy;
2990	to += copy;
2991	}
2992	start = end;
2993	}
2994
2995	skb_walk_frags(skb, frag_iter) {
2996	int end;
2997
2998	WARN_ON(start > offset + len);
2999
3000	end = start + frag_iter->len;
3001	if ((copy = end - offset) > `0`) {
3002	if (copy > len)
3003	copy = len;
3004	if (skb_copy_bits(skb: frag_iter, offset: offset - start, to, len: copy))
3005	goto fault;
3006	if ((len -= copy) == `0`)
3007	return `0`;
3008	offset += copy;
3009	to += copy;
3010	}
3011	start = end;
3012	}
3013
3014	if (!len)
3015	return `0`;
3016
3017	fault:
3018	return -EFAULT;
3019	}
3020	EXPORT_SYMBOL(skb_copy_bits);
3021
3022	/*
3023	* Callback from splice_to_pipe(), if we need to release some pages
3024	* at the end of the spd in case we error'ed out in filling the pipe.
3025	*/
3026	static void sock_spd_release(struct splice_pipe_desc spd, unsigned* int i)
3027	{
3028	put_page(page: spd->pages[i]);
3029	}
3030
3031	static struct page linear_to_page(struct* page page, unsigned* int *len,
3032	unsigned int *offset,
3033	struct sock *sk)
3034	{
3035	struct page_frag *pfrag = sk_page_frag(sk);
3036
3037	if (!sk_page_frag_refill(sk, pfrag))
3038	return NULL;
3039
3040	len = min_t(unsigned* int, *len, pfrag->size - pfrag->offset);
3041
3042	memcpy(page_address(pfrag->page) + pfrag->offset,
3043	page_address(page) + offset, len);
3044	*offset = pfrag->offset;
3045	pfrag->offset += *len;
3046
3047	return pfrag->page;
3048	}
3049
3050	static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
3051	struct page *page,
3052	unsigned int offset)
3053	{
3054	return spd->nr_pages &&
3055	spd->pages[spd->nr_pages - `1`] == page &&
3056	(spd->partial[spd->nr_pages - `1`].offset +
3057	spd->partial[spd->nr_pages - `1`].len == offset);
3058	}
3059
3060	/*
3061	* Fill page/offset/length into spd, if it can hold more pages.
3062	*/
3063	static bool spd_fill_page(struct splice_pipe_desc *spd,
3064	struct pipe_inode_info pipe, struct* page *page,
3065	unsigned int len, unsigned* int offset,
3066	bool linear,
3067	struct sock *sk)
3068	{
3069	if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
3070	return true;
3071
3072	if (linear) {
3073	page = linear_to_page(page, len, offset: &offset, sk);
3074	if (!page)
3075	return true;
3076	}
3077	if (spd_can_coalesce(spd, page, offset)) {
3078	spd->partial[spd->nr_pages - `1`].len += *len;
3079	return false;
3080	}
3081	get_page(page);
3082	spd->pages[spd->nr_pages] = page;
3083	spd->partial[spd->nr_pages].len = *len;
3084	spd->partial[spd->nr_pages].offset = offset;
3085	spd->nr_pages++;
3086
3087	return false;
3088	}
3089
3090	static bool __splice_segment(struct page page, unsigned* int poff,
3091	unsigned int plen, unsigned int *off,
3092	unsigned int *len,
3093	struct splice_pipe_desc *spd, bool linear,
3094	struct sock *sk,
3095	struct pipe_inode_info *pipe)
3096	{
3097	if (!*len)
3098	return true;
3099
3100	/ skip this segment if already processed /
3101	if (*off >= plen) {
3102	*off -= plen;
3103	return false;
3104	}
3105
3106	/ ignore any bits we already processed /
3107	poff += *off;
3108	plen -= *off;
3109	*off = `0`;
3110
3111	do {
3112	unsigned int flen = min(*len, plen);
3113
3114	if (spd_fill_page(spd, pipe, page, len: &flen, offset: poff,
3115	linear, sk))
3116	return true;
3117	poff += flen;
3118	plen -= flen;
3119	*len -= flen;
3120	} while (*len && plen);
3121
3122	return false;
3123	}
3124
3125	/*
3126	* Map linear and fragment data from the skb to spd. It reports true if the
3127	* pipe is full or if we already spliced the requested length.
3128	*/
3129	static bool __skb_splice_bits(struct sk_buff skb, struct* pipe_inode_info *pipe,
3130	unsigned int offset, unsigned* int *len,
3131	struct splice_pipe_desc spd, struct* sock *sk)
3132	{
3133	int seg;
3134	struct sk_buff *iter;
3135
3136	/ map the linear part :*
3137	* If skb->head_frag is set, this 'linear' part is backed by a
3138	* fragment, and if the head is not shared with any clones then
3139	* we can avoid a copy since we own the head portion of this page.
3140	*/
3141	if (__splice_segment(virt_to_page(skb->data),
3142	poff: (unsigned long) skb->data & (PAGE_SIZE - `1`),
3143	plen: skb_headlen(skb),
3144	off: offset, len, spd,
3145	linear: skb_head_is_locked(skb),
3146	sk, pipe))
3147	return true;
3148
3149	/*
3150	* then map the fragments
3151	*/
3152	if (!skb_frags_readable(skb))
3153	return false;
3154
3155	for (seg = `0`; seg < skb_shinfo(skb)->nr_frags; seg++) {
3156	const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
3157
3158	if (WARN_ON_ONCE(!skb_frag_page(f)))
3159	return false;
3160
3161	if (__splice_segment(page: skb_frag_page(frag: f),
3162	poff: skb_frag_off(frag: f), plen: skb_frag_size(frag: f),
3163	off: offset, len, spd, linear: false, sk, pipe))
3164	return true;
3165	}
3166
3167	skb_walk_frags(skb, iter) {
3168	if (*offset >= iter->len) {
3169	*offset -= iter->len;
3170	continue;
3171	}
3172	/ __skb_splice_bits() only fails if the output has no room*
3173	* left, so no point in going over the frag_list for the error
3174	* case.
3175	*/
3176	if (__skb_splice_bits(skb: iter, pipe, offset, len, spd, sk))
3177	return true;
3178	}
3179
3180	return false;
3181	}
3182
3183	/*
3184	* Map data from the skb to a pipe. Should handle both the linear part,
3185	* the fragments, and the frag list.
3186	*/
3187	int skb_splice_bits(struct sk_buff skb, struct* sock sk, unsigned* int offset,
3188	struct pipe_inode_info pipe, unsigned* int tlen,
3189	unsigned int flags)
3190	{
3191	struct partial_page partial[MAX_SKB_FRAGS];
3192	struct page *pages[MAX_SKB_FRAGS];
3193	struct splice_pipe_desc spd = {
3194	.pages = pages,
3195	.partial = partial,
3196	.nr_pages_max = MAX_SKB_FRAGS,
3197	.ops = &nosteal_pipe_buf_ops,
3198	.spd_release = sock_spd_release,
3199	};
3200	int ret = `0`;
3201
3202	__skb_splice_bits(skb, pipe, offset: &offset, len: &tlen, spd: &spd, sk);
3203
3204	if (spd.nr_pages)
3205	ret = splice_to_pipe(pipe, spd: &spd);
3206
3207	return ret;
3208	}
3209	EXPORT_SYMBOL_GPL(skb_splice_bits);
3210
3211	static int sendmsg_locked(struct sock sk, struct* msghdr *msg)
3212	{
3213	struct socket *sock = sk->sk_socket;
3214	size_t size = msg_data_left(msg);
3215
3216	if (!sock)
3217	return -EINVAL;
3218
3219	if (!sock->ops->sendmsg_locked)
3220	return sock_no_sendmsg_locked(sk, msg, len: size);
3221
3222	return sock->ops->sendmsg_locked(sk, msg, size);
3223	}
3224
3225	static int sendmsg_unlocked(struct sock sk, struct* msghdr *msg)
3226	{
3227	struct socket *sock = sk->sk_socket;
3228
3229	if (!sock)
3230	return -EINVAL;
3231	return sock_sendmsg(sock, msg);
3232	}
3233
3234	typedef int (sendmsg_func)(struct* sock sk, struct* msghdr *msg);
3235	static int __skb_send_sock(struct sock sk, struct* sk_buff skb, int* offset,
3236	int len, sendmsg_func sendmsg, int flags)
3237	{
3238	unsigned int orig_len = len;
3239	struct sk_buff *head = skb;
3240	unsigned short fragidx;
3241	int slen, ret;
3242
3243	do_frag_list:
3244
3245	/ Deal with head data /
3246	while (offset < skb_headlen(skb) && len) {
3247	struct kvec kv;
3248	struct msghdr msg;
3249
3250	slen = min_t(int, len, skb_headlen(skb) - offset);
3251	kv.iov_base = skb->data + offset;
3252	kv.iov_len = slen;
3253	memset(&msg, `0`, sizeof(msg));
3254	msg.msg_flags = MSG_DONTWAIT \| flags;
3255
3256	iov_iter_kvec(i: &msg.msg_iter, ITER_SOURCE, kvec: &kv, nr_segs: `1`, count: slen);
3257	ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
3258	sendmsg_unlocked, sk, &msg);
3259	if (ret <= `0`)
3260	goto error;
3261
3262	offset += ret;
3263	len -= ret;
3264	}
3265
3266	/ All the data was skb head? /
3267	if (!len)
3268	goto out;
3269
3270	/ Make offset relative to start of frags /
3271	offset -= skb_headlen(skb);
3272
3273	/ Find where we are in frag list /
3274	for (fragidx = `0`; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
3275	skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
3276
3277	if (offset < skb_frag_size(frag))
3278	break;
3279
3280	offset -= skb_frag_size(frag);
3281	}
3282
3283	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
3284	skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
3285
3286	slen = min_t(size_t, len, skb_frag_size(frag) - offset);
3287
3288	while (slen) {
3289	struct bio_vec bvec;
3290	struct msghdr msg = {
3291	.msg_flags = MSG_SPLICE_PAGES \| MSG_DONTWAIT \|
3292	flags,
3293	};
3294
3295	bvec_set_page(bv: &bvec, page: skb_frag_page(frag), len: slen,
3296	offset: skb_frag_off(frag) + offset);
3297	iov_iter_bvec(i: &msg.msg_iter, ITER_SOURCE, bvec: &bvec, nr_segs: `1`,
3298	count: slen);
3299
3300	ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
3301	sendmsg_unlocked, sk, &msg);
3302	if (ret <= `0`)
3303	goto error;
3304
3305	len -= ret;
3306	offset += ret;
3307	slen -= ret;
3308	}
3309
3310	offset = `0`;
3311	}
3312
3313	if (len) {
3314	/ Process any frag lists /
3315
3316	if (skb == head) {
3317	if (skb_has_frag_list(skb)) {
3318	skb = skb_shinfo(skb)->frag_list;
3319	goto do_frag_list;
3320	}
3321	} else if (skb->next) {
3322	skb = skb->next;
3323	goto do_frag_list;
3324	}
3325	}
3326
3327	out:
3328	return orig_len - len;
3329
3330	error:
3331	return orig_len == len ? ret : orig_len - len;
3332	}
3333
3334	/ Send skb data on a socket. Socket must be locked. /
3335	int skb_send_sock_locked(struct sock sk, struct* sk_buff skb, int* offset,
3336	int len)
3337	{
3338	return __skb_send_sock(sk, skb, offset, len, sendmsg: sendmsg_locked, flags: `0`);
3339	}
3340	EXPORT_SYMBOL_GPL(skb_send_sock_locked);
3341
3342	int skb_send_sock_locked_with_flags(struct sock sk, struct* sk_buff *skb,
3343	int offset, int len, int flags)
3344	{
3345	return __skb_send_sock(sk, skb, offset, len, sendmsg: sendmsg_locked, flags);
3346	}
3347	EXPORT_SYMBOL_GPL(skb_send_sock_locked_with_flags);
3348
3349	/ Send skb data on a socket. Socket must be unlocked. /
3350	int skb_send_sock(struct sock sk, struct* sk_buff skb, int* offset, int len)
3351	{
3352	return __skb_send_sock(sk, skb, offset, len, sendmsg: sendmsg_unlocked, flags: `0`);
3353	}
3354
3355	/**
3356	* skb_store_bits - store bits from kernel buffer to skb
3357	* @skb: destination buffer
3358	* @offset: offset in destination
3359	* @from: source buffer
3360	* @len: number of bytes to copy
3361	*
3362	* Copy the specified number of bytes from the source buffer to the
3363	* destination skb. This function handles all the messy bits of
3364	* traversing fragment lists and such.
3365	*/
3366
3367	int skb_store_bits(struct sk_buff skb, int* offset, const void from, int* len)
3368	{
3369	int start = skb_headlen(skb);
3370	struct sk_buff *frag_iter;
3371	int i, copy;
3372
3373	if (offset > (int)skb->len - len)
3374	goto fault;
3375
3376	if ((copy = start - offset) > `0`) {
3377	if (copy > len)
3378	copy = len;
3379	skb_copy_to_linear_data_offset(skb, offset, from, len: copy);
3380	if ((len -= copy) == `0`)
3381	return `0`;
3382	offset += copy;
3383	from += copy;
3384	}
3385
3386	if (!skb_frags_readable(skb))
3387	goto fault;
3388
3389	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3390	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3391	int end;
3392
3393	WARN_ON(start > offset + len);
3394
3395	end = start + skb_frag_size(frag);
3396	if ((copy = end - offset) > `0`) {
3397	u32 p_off, p_len, copied;
3398	struct page *p;
3399	u8 *vaddr;
3400
3401	if (copy > len)
3402	copy = len;
3403
3404	skb_frag_foreach_page(frag,
3405	skb_frag_off(frag) + offset - start,
3406	copy, p, p_off, p_len, copied) {
3407	vaddr = kmap_atomic(page: p);
3408	memcpy(vaddr + p_off, from + copied, p_len);
3409	kunmap_atomic(vaddr);
3410	}
3411
3412	if ((len -= copy) == `0`)
3413	return `0`;
3414	offset += copy;
3415	from += copy;
3416	}
3417	start = end;
3418	}
3419
3420	skb_walk_frags(skb, frag_iter) {
3421	int end;
3422
3423	WARN_ON(start > offset + len);
3424
3425	end = start + frag_iter->len;
3426	if ((copy = end - offset) > `0`) {
3427	if (copy > len)
3428	copy = len;
3429	if (skb_store_bits(skb: frag_iter, offset: offset - start,
3430	from, len: copy))
3431	goto fault;
3432	if ((len -= copy) == `0`)
3433	return `0`;
3434	offset += copy;
3435	from += copy;
3436	}
3437	start = end;
3438	}
3439	if (!len)
3440	return `0`;
3441
3442	fault:
3443	return -EFAULT;
3444	}
3445	EXPORT_SYMBOL(skb_store_bits);
3446
3447	/ Checksum skb data. /
3448	__wsum skb_checksum(const struct sk_buff skb, int* offset, int len, __wsum csum)
3449	{
3450	int start = skb_headlen(skb);
3451	int i, copy = start - offset;
3452	struct sk_buff *frag_iter;
3453	int pos = `0`;
3454
3455	/ Checksum header. /
3456	if (copy > `0`) {
3457	if (copy > len)
3458	copy = len;
3459	csum = csum_partial(buff: skb->data + offset, len: copy, sum: csum);
3460	if ((len -= copy) == `0`)
3461	return csum;
3462	offset += copy;
3463	pos = copy;
3464	}
3465
3466	if (WARN_ON_ONCE(!skb_frags_readable(skb)))
3467	return `0`;
3468
3469	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3470	int end;
3471	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3472
3473	WARN_ON(start > offset + len);
3474
3475	end = start + skb_frag_size(frag);
3476	if ((copy = end - offset) > `0`) {
3477	u32 p_off, p_len, copied;
3478	struct page *p;
3479	__wsum csum2;
3480	u8 *vaddr;
3481
3482	if (copy > len)
3483	copy = len;
3484
3485	skb_frag_foreach_page(frag,
3486	skb_frag_off(frag) + offset - start,
3487	copy, p, p_off, p_len, copied) {
3488	vaddr = kmap_atomic(page: p);
3489	csum2 = csum_partial(buff: vaddr + p_off, len: p_len, sum: `0`);
3490	kunmap_atomic(vaddr);
3491	csum = csum_block_add(csum, csum2, offset: pos);
3492	pos += p_len;
3493	}
3494
3495	if (!(len -= copy))
3496	return csum;
3497	offset += copy;
3498	}
3499	start = end;
3500	}
3501
3502	skb_walk_frags(skb, frag_iter) {
3503	int end;
3504
3505	WARN_ON(start > offset + len);
3506
3507	end = start + frag_iter->len;
3508	if ((copy = end - offset) > `0`) {
3509	__wsum csum2;
3510	if (copy > len)
3511	copy = len;
3512	csum2 = skb_checksum(skb: frag_iter, offset: offset - start, len: copy,
3513	csum: `0`);
3514	csum = csum_block_add(csum, csum2, offset: pos);
3515	if ((len -= copy) == `0`)
3516	return csum;
3517	offset += copy;
3518	pos += copy;
3519	}
3520	start = end;
3521	}
3522	BUG_ON(len);
3523
3524	return csum;
3525	}
3526	EXPORT_SYMBOL(skb_checksum);
3527
3528	/ Both of above in one bottle. /
3529
3530	__wsum skb_copy_and_csum_bits(const struct sk_buff skb, int* offset,
3531	u8 to, int* len)
3532	{
3533	int start = skb_headlen(skb);
3534	int i, copy = start - offset;
3535	struct sk_buff *frag_iter;
3536	int pos = `0`;
3537	__wsum csum = `0`;
3538
3539	/ Copy header. /
3540	if (copy > `0`) {
3541	if (copy > len)
3542	copy = len;
3543	csum = csum_partial_copy_nocheck(src: skb->data + offset, dst: to,
3544	len: copy);
3545	if ((len -= copy) == `0`)
3546	return csum;
3547	offset += copy;
3548	to += copy;
3549	pos = copy;
3550	}
3551
3552	if (!skb_frags_readable(skb))
3553	return `0`;
3554
3555	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3556	int end;
3557
3558	WARN_ON(start > offset + len);
3559
3560	end = start + skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
3561	if ((copy = end - offset) > `0`) {
3562	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3563	u32 p_off, p_len, copied;
3564	struct page *p;
3565	__wsum csum2;
3566	u8 *vaddr;
3567
3568	if (copy > len)
3569	copy = len;
3570
3571	skb_frag_foreach_page(frag,
3572	skb_frag_off(frag) + offset - start,
3573	copy, p, p_off, p_len, copied) {
3574	vaddr = kmap_atomic(page: p);
3575	csum2 = csum_partial_copy_nocheck(src: vaddr + p_off,
3576	dst: to + copied,
3577	len: p_len);
3578	kunmap_atomic(vaddr);
3579	csum = csum_block_add(csum, csum2, offset: pos);
3580	pos += p_len;
3581	}
3582
3583	if (!(len -= copy))
3584	return csum;
3585	offset += copy;
3586	to += copy;
3587	}
3588	start = end;
3589	}
3590
3591	skb_walk_frags(skb, frag_iter) {
3592	__wsum csum2;
3593	int end;
3594
3595	WARN_ON(start > offset + len);
3596
3597	end = start + frag_iter->len;
3598	if ((copy = end - offset) > `0`) {
3599	if (copy > len)
3600	copy = len;
3601	csum2 = skb_copy_and_csum_bits(skb: frag_iter,
3602	offset: offset - start,
3603	to, len: copy);
3604	csum = csum_block_add(csum, csum2, offset: pos);
3605	if ((len -= copy) == `0`)
3606	return csum;
3607	offset += copy;
3608	to += copy;
3609	pos += copy;
3610	}
3611	start = end;
3612	}
3613	BUG_ON(len);
3614	return csum;
3615	}
3616	EXPORT_SYMBOL(skb_copy_and_csum_bits);
3617
3618	#ifdef CONFIG_NET_CRC32C
3619	u32 skb_crc32c(const struct sk_buff skb, int* offset, int len, u32 crc)
3620	{
3621	int start = skb_headlen(skb);
3622	int i, copy = start - offset;
3623	struct sk_buff *frag_iter;
3624
3625	if (copy > `0`) {
3626	copy = min(copy, len);
3627	crc = crc32c(crc, p: skb->data + offset, len: copy);
3628	len -= copy;
3629	if (len == `0`)
3630	return crc;
3631	offset += copy;
3632	}
3633
3634	if (WARN_ON_ONCE(!skb_frags_readable(skb)))
3635	return `0`;
3636
3637	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3638	int end;
3639	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3640
3641	WARN_ON(start > offset + len);
3642
3643	end = start + skb_frag_size(frag);
3644	copy = end - offset;
3645	if (copy > `0`) {
3646	u32 p_off, p_len, copied;
3647	struct page *p;
3648	u8 *vaddr;
3649
3650	copy = min(copy, len);
3651	skb_frag_foreach_page(frag,
3652	skb_frag_off(frag) + offset - start,
3653	copy, p, p_off, p_len, copied) {
3654	vaddr = kmap_atomic(page: p);
3655	crc = crc32c(crc, p: vaddr + p_off, len: p_len);
3656	kunmap_atomic(vaddr);
3657	}
3658	len -= copy;
3659	if (len == `0`)
3660	return crc;
3661	offset += copy;
3662	}
3663	start = end;
3664	}
3665
3666	skb_walk_frags(skb, frag_iter) {
3667	int end;
3668
3669	WARN_ON(start > offset + len);
3670
3671	end = start + frag_iter->len;
3672	copy = end - offset;
3673	if (copy > `0`) {
3674	copy = min(copy, len);
3675	crc = skb_crc32c(skb: frag_iter, offset: offset - start, len: copy, crc);
3676	len -= copy;
3677	if (len == `0`)
3678	return crc;
3679	offset += copy;
3680	}
3681	start = end;
3682	}
3683	BUG_ON(len);
3684
3685	return crc;
3686	}
3687	EXPORT_SYMBOL(skb_crc32c);
3688	#endif /* CONFIG_NET_CRC32C */
3689
3690	__sum16 __skb_checksum_complete_head(struct sk_buff skb, int* len)
3691	{
3692	__sum16 sum;
3693
3694	sum = csum_fold(csum: skb_checksum(skb, `0`, len, skb->csum));
3695	/ See comments in __skb_checksum_complete(). /
3696	if (likely(!sum)) {
3697	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
3698	!skb->csum_complete_sw)
3699	netdev_rx_csum_fault(dev: skb->dev, skb);
3700	}
3701	if (!skb_shared(skb))
3702	skb->csum_valid = !sum;
3703	return sum;
3704	}
3705	EXPORT_SYMBOL(__skb_checksum_complete_head);
3706
3707	/ This function assumes skb->csum already holds pseudo header's checksum,*
3708	* which has been changed from the hardware checksum, for example, by
3709	* __skb_checksum_validate_complete(). And, the original skb->csum must
3710	* have been validated unsuccessfully for CHECKSUM_COMPLETE case.
3711	*
3712	* It returns non-zero if the recomputed checksum is still invalid, otherwise
3713	* zero. The new checksum is stored back into skb->csum unless the skb is
3714	* shared.
3715	*/
3716	__sum16 __skb_checksum_complete(struct sk_buff *skb)
3717	{
3718	__wsum csum;
3719	__sum16 sum;
3720
3721	csum = skb_checksum(skb, `0`, skb->len, `0`);
3722
3723	sum = csum_fold(csum: csum_add(csum: skb->csum, addend: csum));
3724	/ This check is inverted, because we already knew the hardware*
3725	* checksum is invalid before calling this function. So, if the
3726	* re-computed checksum is valid instead, then we have a mismatch
3727	* between the original skb->csum and skb_checksum(). This means either
3728	* the original hardware checksum is incorrect or we screw up skb->csum
3729	* when moving skb->data around.
3730	*/
3731	if (likely(!sum)) {
3732	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
3733	!skb->csum_complete_sw)
3734	netdev_rx_csum_fault(dev: skb->dev, skb);
3735	}
3736
3737	if (!skb_shared(skb)) {
3738	/ Save full packet checksum /
3739	skb->csum = csum;
3740	skb->ip_summed = CHECKSUM_COMPLETE;
3741	skb->csum_complete_sw = `1`;
3742	skb->csum_valid = !sum;
3743	}
3744
3745	return sum;
3746	}
3747	EXPORT_SYMBOL(__skb_checksum_complete);
3748
3749	/**
3750	* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
3751	* @from: source buffer
3752	*
3753	* Calculates the amount of linear headroom needed in the 'to' skb passed
3754	* into skb_zerocopy().
3755	*/
3756	unsigned int
3757	skb_zerocopy_headlen(const struct sk_buff *from)
3758	{
3759	unsigned int hlen = `0`;
3760
3761	if (!from->head_frag \|\|
3762	skb_headlen(skb: from) < L1_CACHE_BYTES \|\|
3763	skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
3764	hlen = skb_headlen(skb: from);
3765	if (!hlen)
3766	hlen = from->len;
3767	}
3768
3769	if (skb_has_frag_list(skb: from))
3770	hlen = from->len;
3771
3772	return hlen;
3773	}
3774	EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
3775
3776	/**
3777	* skb_zerocopy - Zero copy skb to skb
3778	* @to: destination buffer
3779	* @from: source buffer
3780	* @len: number of bytes to copy from source buffer
3781	* @hlen: size of linear headroom in destination buffer
3782	*
3783	* Copies up to `len` bytes from `from` to `to` by creating references
3784	* to the frags in the source buffer.
3785	*
3786	* The `hlen` as calculated by skb_zerocopy_headlen() specifies the
3787	* headroom in the `to` buffer.
3788	*
3789	* Return value:
3790	* 0: everything is OK
3791	* -ENOMEM: couldn't orphan frags of @from due to lack of memory
3792	* -EFAULT: skb_copy_bits() found some problem with skb geometry
3793	*/
3794	int
3795	skb_zerocopy(struct sk_buff to, struct* sk_buff from, int* len, int hlen)
3796	{
3797	int i, j = `0`;
3798	int plen = `0`; / length of skb->head fragment /
3799	int ret;
3800	struct page *page;
3801	unsigned int offset;
3802
3803	BUG_ON(!from->head_frag && !hlen);
3804
3805	/ dont bother with small payloads /
3806	if (len <= skb_tailroom(skb: to))
3807	return skb_copy_bits(from, `0`, skb_put(to, len), len);
3808
3809	if (hlen) {
3810	ret = skb_copy_bits(from, `0`, skb_put(to, hlen), hlen);
3811	if (unlikely(ret))
3812	return ret;
3813	len -= hlen;
3814	} else {
3815	plen = min_t(int, skb_headlen(from), len);
3816	if (plen) {
3817	page = virt_to_head_page(x: from->head);
3818	offset = from->data - (unsigned char *)page_address(page);
3819	__skb_fill_netmem_desc(skb: to, i: `0`, netmem: page_to_netmem(page),
3820	off: offset, size: plen);
3821	get_page(page);
3822	j = `1`;
3823	len -= plen;
3824	}
3825	}
3826
3827	skb_len_add(skb: to, delta: len + plen);
3828
3829	if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
3830	skb_tx_error(from);
3831	return -ENOMEM;
3832	}
3833	skb_zerocopy_clone(nskb: to, orig: from, GFP_ATOMIC);
3834
3835	for (i = `0`; i < skb_shinfo(from)->nr_frags; i++) {
3836	int size;
3837
3838	if (!len)
3839	break;
3840	skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
3841	size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
3842	len);
3843	skb_frag_size_set(frag: &skb_shinfo(to)->frags[j], size);
3844	len -= size;
3845	skb_frag_ref(skb: to, f: j);
3846	j++;
3847	}
3848	skb_shinfo(to)->nr_frags = j;
3849
3850	return `0`;
3851	}
3852	EXPORT_SYMBOL_GPL(skb_zerocopy);
3853
3854	void skb_copy_and_csum_dev(const struct sk_buff skb, u8 to)
3855	{
3856	__wsum csum;
3857	long csstart;
3858
3859	if (skb->ip_summed == CHECKSUM_PARTIAL)
3860	csstart = skb_checksum_start_offset(skb);
3861	else
3862	csstart = skb_headlen(skb);
3863
3864	BUG_ON(csstart > skb_headlen(skb));
3865
3866	skb_copy_from_linear_data(skb, to, len: csstart);
3867
3868	csum = `0`;
3869	if (csstart != skb->len)
3870	csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
3871	skb->len - csstart);
3872
3873	if (skb->ip_summed == CHECKSUM_PARTIAL) {
3874	long csstuff = csstart + skb->csum_offset;
3875
3876	((__sum16 )(to + csstuff)) = csum_fold(csum);
3877	}
3878	}
3879	EXPORT_SYMBOL(skb_copy_and_csum_dev);
3880
3881	/**
3882	* skb_dequeue - remove from the head of the queue
3883	* @list: list to dequeue from
3884	*
3885	* Remove the head of the list. The list lock is taken so the function
3886	* may be used safely with other locking list functions. The head item is
3887	* returned or %NULL if the list is empty.
3888	*/
3889
3890	struct sk_buff skb_dequeue(struct* sk_buff_head *list)
3891	{
3892	unsigned long flags;
3893	struct sk_buff *result;
3894
3895	spin_lock_irqsave(&list->lock, flags);
3896	result = __skb_dequeue(list);
3897	spin_unlock_irqrestore(lock: &list->lock, flags);
3898	return result;
3899	}
3900	EXPORT_SYMBOL(skb_dequeue);
3901
3902	/**
3903	* skb_dequeue_tail - remove from the tail of the queue
3904	* @list: list to dequeue from
3905	*
3906	* Remove the tail of the list. The list lock is taken so the function
3907	* may be used safely with other locking list functions. The tail item is
3908	* returned or %NULL if the list is empty.
3909	*/
3910	struct sk_buff skb_dequeue_tail(struct* sk_buff_head *list)
3911	{
3912	unsigned long flags;
3913	struct sk_buff *result;
3914
3915	spin_lock_irqsave(&list->lock, flags);
3916	result = __skb_dequeue_tail(list);
3917	spin_unlock_irqrestore(lock: &list->lock, flags);
3918	return result;
3919	}
3920	EXPORT_SYMBOL(skb_dequeue_tail);
3921
3922	/**
3923	* skb_queue_purge_reason - empty a list
3924	* @list: list to empty
3925	* @reason: drop reason
3926	*
3927	* Delete all buffers on an &sk_buff list. Each buffer is removed from
3928	* the list and one reference dropped. This function takes the list
3929	* lock and is atomic with respect to other list locking functions.
3930	*/
3931	void skb_queue_purge_reason(struct sk_buff_head *list,
3932	enum skb_drop_reason reason)
3933	{
3934	struct sk_buff_head tmp;
3935	unsigned long flags;
3936
3937	if (skb_queue_empty_lockless(list))
3938	return;
3939
3940	__skb_queue_head_init(list: &tmp);
3941
3942	spin_lock_irqsave(&list->lock, flags);
3943	skb_queue_splice_init(list, head: &tmp);
3944	spin_unlock_irqrestore(lock: &list->lock, flags);
3945
3946	__skb_queue_purge_reason(list: &tmp, reason);
3947	}
3948	EXPORT_SYMBOL(skb_queue_purge_reason);
3949
3950	/**
3951	* skb_rbtree_purge - empty a skb rbtree
3952	* @root: root of the rbtree to empty
3953	* Return value: the sum of truesizes of all purged skbs.
3954	*
3955	* Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
3956	* the list and one reference dropped. This function does not take
3957	* any lock. Synchronization should be handled by the caller (e.g., TCP
3958	* out-of-order queue is protected by the socket lock).
3959	*/
3960	unsigned int skb_rbtree_purge(struct rb_root *root)
3961	{
3962	struct rb_node *p = rb_first(root);
3963	unsigned int sum = `0`;
3964
3965	while (p) {
3966	struct sk_buff skb = rb_entry(p, struct* sk_buff, rbnode);
3967
3968	p = rb_next(p);
3969	rb_erase(&skb->rbnode, root);
3970	sum += skb->truesize;
3971	kfree_skb(skb);
3972	}
3973	return sum;
3974	}
3975
3976	void skb_errqueue_purge(struct sk_buff_head *list)
3977	{
3978	struct sk_buff skb, next;
3979	struct sk_buff_head kill;
3980	unsigned long flags;
3981
3982	__skb_queue_head_init(list: &kill);
3983
3984	spin_lock_irqsave(&list->lock, flags);
3985	skb_queue_walk_safe(list, skb, next) {
3986	if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY \|\|
3987	SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING)
3988	continue;
3989	__skb_unlink(skb, list);
3990	__skb_queue_tail(list: &kill, newsk: skb);
3991	}
3992	spin_unlock_irqrestore(lock: &list->lock, flags);
3993	__skb_queue_purge(list: &kill);
3994	}
3995	EXPORT_SYMBOL(skb_errqueue_purge);
3996
3997	/**
3998	* skb_queue_head - queue a buffer at the list head
3999	* @list: list to use
4000	* @newsk: buffer to queue
4001	*
4002	* Queue a buffer at the start of the list. This function takes the
4003	* list lock and can be used safely with other locking &sk_buff functions
4004	* safely.
4005	*
4006	* A buffer cannot be placed on two lists at the same time.
4007	*/
4008	void skb_queue_head(struct sk_buff_head list, struct* sk_buff *newsk)
4009	{
4010	unsigned long flags;
4011
4012	spin_lock_irqsave(&list->lock, flags);
4013	__skb_queue_head(list, newsk);
4014	spin_unlock_irqrestore(lock: &list->lock, flags);
4015	}
4016	EXPORT_SYMBOL(skb_queue_head);
4017
4018	/**
4019	* skb_queue_tail - queue a buffer at the list tail
4020	* @list: list to use
4021	* @newsk: buffer to queue
4022	*
4023	* Queue a buffer at the tail of the list. This function takes the
4024	* list lock and can be used safely with other locking &sk_buff functions
4025	* safely.
4026	*
4027	* A buffer cannot be placed on two lists at the same time.
4028	*/
4029	void skb_queue_tail(struct sk_buff_head list, struct* sk_buff *newsk)
4030	{
4031	unsigned long flags;
4032
4033	spin_lock_irqsave(&list->lock, flags);
4034	__skb_queue_tail(list, newsk);
4035	spin_unlock_irqrestore(lock: &list->lock, flags);
4036	}
4037	EXPORT_SYMBOL(skb_queue_tail);
4038
4039	/**
4040	* skb_unlink - remove a buffer from a list
4041	* @skb: buffer to remove
4042	* @list: list to use
4043	*
4044	* Remove a packet from a list. The list locks are taken and this
4045	* function is atomic with respect to other list locked calls
4046	*
4047	* You must know what list the SKB is on.
4048	*/
4049	void skb_unlink(struct sk_buff skb, struct* sk_buff_head *list)
4050	{
4051	unsigned long flags;
4052
4053	spin_lock_irqsave(&list->lock, flags);
4054	__skb_unlink(skb, list);
4055	spin_unlock_irqrestore(lock: &list->lock, flags);
4056	}
4057	EXPORT_SYMBOL(skb_unlink);
4058
4059	/**
4060	* skb_append - append a buffer
4061	* @old: buffer to insert after
4062	* @newsk: buffer to insert
4063	* @list: list to use
4064	*
4065	* Place a packet after a given packet in a list. The list locks are taken
4066	* and this function is atomic with respect to other list locked calls.
4067	* A buffer cannot be placed on two lists at the same time.
4068	*/
4069	void skb_append(struct sk_buff old, struct* sk_buff newsk, struct* sk_buff_head *list)
4070	{
4071	unsigned long flags;
4072
4073	spin_lock_irqsave(&list->lock, flags);
4074	__skb_queue_after(list, prev: old, newsk);
4075	spin_unlock_irqrestore(lock: &list->lock, flags);
4076	}
4077	EXPORT_SYMBOL(skb_append);
4078
4079	static inline void skb_split_inside_header(struct sk_buff *skb,
4080	struct sk_buff* skb1,
4081	const u32 len, const int pos)
4082	{
4083	int i;
4084
4085	skb_copy_from_linear_data_offset(skb, offset: len, to: skb_put(skb1, pos - len),
4086	len: pos - len);
4087	/ And move data appendix as is. /
4088	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
4089	skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
4090
4091	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
4092	skb1->unreadable = skb->unreadable;
4093	skb_shinfo(skb)->nr_frags = `0`;
4094	skb1->data_len = skb->data_len;
4095	skb1->len += skb1->data_len;
4096	skb->data_len = `0`;
4097	skb->len = len;
4098	skb_set_tail_pointer(skb, offset: len);
4099	}
4100
4101	static inline void skb_split_no_header(struct sk_buff *skb,
4102	struct sk_buff* skb1,
4103	const u32 len, int pos)
4104	{
4105	int i, k = `0`;
4106	const int nfrags = skb_shinfo(skb)->nr_frags;
4107
4108	skb_shinfo(skb)->nr_frags = `0`;
4109	skb1->len = skb1->data_len = skb->len - len;
4110	skb->len = len;
4111	skb->data_len = len - pos;
4112
4113	for (i = `0`; i < nfrags; i++) {
4114	int size = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
4115
4116	if (pos + size > len) {
4117	skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
4118
4119	if (pos < len) {
4120	/ Split frag.*
4121	* We have two variants in this case:
4122	* 1. Move all the frag to the second
4123	* part, if it is possible. F.e.
4124	* this approach is mandatory for TUX,
4125	* where splitting is expensive.
4126	* 2. Split is accurately. We make this.
4127	*/
4128	skb_frag_ref(skb, f: i);
4129	skb_frag_off_add(frag: &skb_shinfo(skb1)->frags[`0`], delta: len - pos);
4130	skb_frag_size_sub(frag: &skb_shinfo(skb1)->frags[`0`], delta: len - pos);
4131	skb_frag_size_set(frag: &skb_shinfo(skb)->frags[i], size: len - pos);
4132	skb_shinfo(skb)->nr_frags++;
4133	}
4134	k++;
4135	} else
4136	skb_shinfo(skb)->nr_frags++;
4137	pos += size;
4138	}
4139	skb_shinfo(skb1)->nr_frags = k;
4140
4141	skb1->unreadable = skb->unreadable;
4142	}
4143
4144	/**
4145	* skb_split - Split fragmented skb to two parts at length len.
4146	* @skb: the buffer to split
4147	* @skb1: the buffer to receive the second part
4148	* @len: new length for skb
4149	*/
4150	void skb_split(struct sk_buff skb, struct* sk_buff skb1, const* u32 len)
4151	{
4152	int pos = skb_headlen(skb);
4153	const int zc_flags = SKBFL_SHARED_FRAG \| SKBFL_PURE_ZEROCOPY;
4154
4155	skb_zcopy_downgrade_managed(skb);
4156
4157	skb_shinfo(skb1)->flags \|= skb_shinfo(skb)->flags & zc_flags;
4158	skb_zerocopy_clone(nskb: skb1, orig: skb, gfp_mask: `0`);
4159	if (len < pos) / Split line is inside header. /
4160	skb_split_inside_header(skb, skb1, len, pos);
4161	else / Second chunk has no header, nothing to copy. /
4162	skb_split_no_header(skb, skb1, len, pos);
4163	}
4164	EXPORT_SYMBOL(skb_split);
4165
4166	/ Shifting from/to a cloned skb is a no-go.*
4167	*
4168	* Caller cannot keep skb_shinfo related pointers past calling here!
4169	*/
4170	static int skb_prepare_for_shift(struct sk_buff *skb)
4171	{
4172	return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
4173	}
4174
4175	/**
4176	* skb_shift - Shifts paged data partially from skb to another
4177	* @tgt: buffer into which tail data gets added
4178	* @skb: buffer from which the paged data comes from
4179	* @shiftlen: shift up to this many bytes
4180	*
4181	* Attempts to shift up to shiftlen worth of bytes, which may be less than
4182	* the length of the skb, from skb to tgt. Returns number bytes shifted.
4183	* It's up to caller to free skb if everything was shifted.
4184	*
4185	* If @tgt runs out of frags, the whole operation is aborted.
4186	*
4187	* Skb cannot include anything else but paged data while tgt is allowed
4188	* to have non-paged data as well.
4189	*
4190	* TODO: full sized shift could be optimized but that would need
4191	* specialized skb free'er to handle frags without up-to-date nr_frags.
4192	*/
4193	int skb_shift(struct sk_buff tgt, struct* sk_buff skb, int* shiftlen)
4194	{
4195	int from, to, merge, todo;
4196	skb_frag_t fragfrom, fragto;
4197
4198	BUG_ON(shiftlen > skb->len);
4199
4200	if (skb_headlen(skb))
4201	return `0`;
4202	if (skb_zcopy(skb: tgt) \|\| skb_zcopy(skb))
4203	return `0`;
4204
4205	DEBUG_NET_WARN_ON_ONCE(tgt->pp_recycle != skb->pp_recycle);
4206	DEBUG_NET_WARN_ON_ONCE(skb_cmp_decrypted(tgt, skb));
4207
4208	todo = shiftlen;
4209	from = `0`;
4210	to = skb_shinfo(tgt)->nr_frags;
4211	fragfrom = &skb_shinfo(skb)->frags[from];
4212
4213	/ Actual merge is delayed until the point when we know we can*
4214	* commit all, so that we don't have to undo partial changes
4215	*/
4216	if (!skb_can_coalesce(skb: tgt, i: to, page: skb_frag_page(frag: fragfrom),
4217	off: skb_frag_off(frag: fragfrom))) {
4218	merge = -`1`;
4219	} else {
4220	merge = to - `1`;
4221
4222	todo -= skb_frag_size(frag: fragfrom);
4223	if (todo < `0`) {
4224	if (skb_prepare_for_shift(skb) \|\|
4225	skb_prepare_for_shift(skb: tgt))
4226	return `0`;
4227
4228	/ All previous frag pointers might be stale! /
4229	fragfrom = &skb_shinfo(skb)->frags[from];
4230	fragto = &skb_shinfo(tgt)->frags[merge];
4231
4232	skb_frag_size_add(frag: fragto, delta: shiftlen);
4233	skb_frag_size_sub(frag: fragfrom, delta: shiftlen);
4234	skb_frag_off_add(frag: fragfrom, delta: shiftlen);
4235
4236	goto onlymerged;
4237	}
4238
4239	from++;
4240	}
4241
4242	/ Skip full, not-fitting skb to avoid expensive operations /
4243	if ((shiftlen == skb->len) &&
4244	(skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
4245	return `0`;
4246
4247	if (skb_prepare_for_shift(skb) \|\| skb_prepare_for_shift(skb: tgt))
4248	return `0`;
4249
4250	while ((todo > `0`) && (from < skb_shinfo(skb)->nr_frags)) {
4251	if (to == MAX_SKB_FRAGS)
4252	return `0`;
4253
4254	fragfrom = &skb_shinfo(skb)->frags[from];
4255	fragto = &skb_shinfo(tgt)->frags[to];
4256
4257	if (todo >= skb_frag_size(frag: fragfrom)) {
4258	fragto = fragfrom;
4259	todo -= skb_frag_size(frag: fragfrom);
4260	from++;
4261	to++;
4262
4263	} else {
4264	__skb_frag_ref(frag: fragfrom);
4265	skb_frag_page_copy(fragto, fragfrom);
4266	skb_frag_off_copy(fragto, fragfrom);
4267	skb_frag_size_set(frag: fragto, size: todo);
4268
4269	skb_frag_off_add(frag: fragfrom, delta: todo);
4270	skb_frag_size_sub(frag: fragfrom, delta: todo);
4271	todo = `0`;
4272
4273	to++;
4274	break;
4275	}
4276	}
4277
4278	/ Ready to "commit" this state change to tgt /
4279	skb_shinfo(tgt)->nr_frags = to;
4280
4281	if (merge >= `0`) {
4282	fragfrom = &skb_shinfo(skb)->frags[`0`];
4283	fragto = &skb_shinfo(tgt)->frags[merge];
4284
4285	skb_frag_size_add(frag: fragto, delta: skb_frag_size(frag: fragfrom));
4286	__skb_frag_unref(frag: fragfrom, recycle: skb->pp_recycle);
4287	}
4288
4289	/ Reposition in the original skb /
4290	to = `0`;
4291	while (from < skb_shinfo(skb)->nr_frags)
4292	skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
4293	skb_shinfo(skb)->nr_frags = to;
4294
4295	BUG_ON(todo > `0` && !skb_shinfo(skb)->nr_frags);
4296
4297	onlymerged:
4298	/ Most likely the tgt won't ever need its checksum anymore, skb on*
4299	* the other hand might need it if it needs to be resent
4300	*/
4301	tgt->ip_summed = CHECKSUM_PARTIAL;
4302	skb->ip_summed = CHECKSUM_PARTIAL;
4303
4304	skb_len_add(skb, delta: -shiftlen);
4305	skb_len_add(skb: tgt, delta: shiftlen);
4306
4307	return shiftlen;
4308	}
4309
4310	/**
4311	* skb_prepare_seq_read - Prepare a sequential read of skb data
4312	* @skb: the buffer to read
4313	* @from: lower offset of data to be read
4314	* @to: upper offset of data to be read
4315	* @st: state variable
4316	*
4317	* Initializes the specified state variable. Must be called before
4318	* invoking skb_seq_read() for the first time.
4319	*/
4320	void skb_prepare_seq_read(struct sk_buff skb, unsigned* int from,
4321	unsigned int to, struct skb_seq_state *st)
4322	{
4323	st->lower_offset = from;
4324	st->upper_offset = to;
4325	st->root_skb = st->cur_skb = skb;
4326	st->frag_idx = st->stepped_offset = `0`;
4327	st->frag_data = NULL;
4328	st->frag_off = `0`;
4329	}
4330	EXPORT_SYMBOL(skb_prepare_seq_read);
4331
4332	/**
4333	* skb_seq_read - Sequentially read skb data
4334	* @consumed: number of bytes consumed by the caller so far
4335	* @data: destination pointer for data to be returned
4336	* @st: state variable
4337	*
4338	* Reads a block of skb data at @consumed relative to the
4339	* lower offset specified to skb_prepare_seq_read(). Assigns
4340	* the head of the data block to @data and returns the length
4341	* of the block or 0 if the end of the skb data or the upper
4342	* offset has been reached.
4343	*
4344	* The caller is not required to consume all of the data
4345	* returned, i.e. @consumed is typically set to the number
4346	* of bytes already consumed and the next call to
4347	* skb_seq_read() will return the remaining part of the block.
4348	*
4349	* Note 1: The size of each block of data returned can be arbitrary,
4350	* this limitation is the cost for zerocopy sequential
4351	* reads of potentially non linear data.
4352	*
4353	* Note 2: Fragment lists within fragments are not implemented
4354	* at the moment, state->root_skb could be replaced with
4355	* a stack for this purpose.
4356	*/
4357	unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
4358	struct skb_seq_state *st)
4359	{
4360	unsigned int block_limit, abs_offset = consumed + st->lower_offset;
4361	skb_frag_t *frag;
4362
4363	if (unlikely(abs_offset >= st->upper_offset)) {
4364	if (st->frag_data) {
4365	kunmap_atomic(st->frag_data);
4366	st->frag_data = NULL;
4367	}
4368	return `0`;
4369	}
4370
4371	next_skb:
4372	block_limit = skb_headlen(skb: st->cur_skb) + st->stepped_offset;
4373
4374	if (abs_offset < block_limit && !st->frag_data) {
4375	*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
4376	return block_limit - abs_offset;
4377	}
4378
4379	if (!skb_frags_readable(skb: st->cur_skb))
4380	return `0`;
4381
4382	if (st->frag_idx == `0` && !st->frag_data)
4383	st->stepped_offset += skb_headlen(skb: st->cur_skb);
4384
4385	while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
4386	unsigned int pg_idx, pg_off, pg_sz;
4387
4388	frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
4389
4390	pg_idx = `0`;
4391	pg_off = skb_frag_off(frag);
4392	pg_sz = skb_frag_size(frag);
4393
4394	if (skb_frag_must_loop(p: skb_frag_page(frag))) {
4395	pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT;
4396	pg_off = offset_in_page(pg_off + st->frag_off);
4397	pg_sz = min_t(unsigned int, pg_sz - st->frag_off,
4398	PAGE_SIZE - pg_off);
4399	}
4400
4401	block_limit = pg_sz + st->stepped_offset;
4402	if (abs_offset < block_limit) {
4403	if (!st->frag_data)
4404	st->frag_data = kmap_atomic(page: skb_frag_page(frag) + pg_idx);
4405
4406	data = (u8 )st->frag_data + pg_off +
4407	(abs_offset - st->stepped_offset);
4408
4409	return block_limit - abs_offset;
4410	}
4411
4412	if (st->frag_data) {
4413	kunmap_atomic(st->frag_data);
4414	st->frag_data = NULL;
4415	}
4416
4417	st->stepped_offset += pg_sz;
4418	st->frag_off += pg_sz;
4419	if (st->frag_off == skb_frag_size(frag)) {
4420	st->frag_off = `0`;
4421	st->frag_idx++;
4422	}
4423	}
4424
4425	if (st->frag_data) {
4426	kunmap_atomic(st->frag_data);
4427	st->frag_data = NULL;
4428	}
4429
4430	if (st->root_skb == st->cur_skb && skb_has_frag_list(skb: st->root_skb)) {
4431	st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
4432	st->frag_idx = `0`;
4433	goto next_skb;
4434	} else if (st->cur_skb->next) {
4435	st->cur_skb = st->cur_skb->next;
4436	st->frag_idx = `0`;
4437	goto next_skb;
4438	}
4439
4440	return `0`;
4441	}
4442	EXPORT_SYMBOL(skb_seq_read);
4443
4444	/**
4445	* skb_abort_seq_read - Abort a sequential read of skb data
4446	* @st: state variable
4447	*
4448	* Must be called if skb_seq_read() was not called until it
4449	* returned 0.
4450	*/
4451	void skb_abort_seq_read(struct skb_seq_state *st)
4452	{
4453	if (st->frag_data)
4454	kunmap_atomic(st->frag_data);
4455	}
4456	EXPORT_SYMBOL(skb_abort_seq_read);
4457
4458	/**
4459	* skb_copy_seq_read() - copy from a skb_seq_state to a buffer
4460	* @st: source skb_seq_state
4461	* @offset: offset in source
4462	* @to: destination buffer
4463	* @len: number of bytes to copy
4464	*
4465	* Copy @len bytes from @offset bytes into the source @st to the destination
4466	* buffer @to. `offset` should increase (or be unchanged) with each subsequent
4467	* call to this function. If offset needs to decrease from the previous use `st`
4468	* should be reset first.
4469	*
4470	* Return: 0 on success or -EINVAL if the copy ended early
4471	*/
4472	int skb_copy_seq_read(struct skb_seq_state st, int* offset, void to, int* len)
4473	{
4474	const u8 *data;
4475	u32 sqlen;
4476
4477	for (;;) {
4478	sqlen = skb_seq_read(offset, &data, st);
4479	if (sqlen == `0`)
4480	return -EINVAL;
4481	if (sqlen >= len) {
4482	memcpy(to, data, len);
4483	return `0`;
4484	}
4485	memcpy(to, data, sqlen);
4486	to += sqlen;
4487	offset += sqlen;
4488	len -= sqlen;
4489	}
4490	}
4491	EXPORT_SYMBOL(skb_copy_seq_read);
4492
4493	#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
4494
4495	static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
4496	struct ts_config *conf,
4497	struct ts_state *state)
4498	{
4499	return skb_seq_read(offset, text, TS_SKB_CB(state));
4500	}
4501
4502	static void skb_ts_finish(struct ts_config conf, struct* ts_state *state)
4503	{
4504	skb_abort_seq_read(TS_SKB_CB(state));
4505	}
4506
4507	/**
4508	* skb_find_text - Find a text pattern in skb data
4509	* @skb: the buffer to look in
4510	* @from: search offset
4511	* @to: search limit
4512	* @config: textsearch configuration
4513	*
4514	* Finds a pattern in the skb data according to the specified
4515	* textsearch configuration. Use textsearch_next() to retrieve
4516	* subsequent occurrences of the pattern. Returns the offset
4517	* to the first occurrence or UINT_MAX if no match was found.
4518	*/
4519	unsigned int skb_find_text(struct sk_buff skb, unsigned* int from,
4520	unsigned int to, struct ts_config *config)
4521	{
4522	unsigned int patlen = config->ops->get_pattern_len(config);
4523	struct ts_state state;
4524	unsigned int ret;
4525
4526	BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb));
4527
4528	config->get_next_block = skb_ts_get_next_block;
4529	config->finish = skb_ts_finish;
4530
4531	skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
4532
4533	ret = textsearch_find(conf: config, state: &state);
4534	return (ret + patlen <= to - from ? ret : UINT_MAX);
4535	}
4536	EXPORT_SYMBOL(skb_find_text);
4537
4538	int skb_append_pagefrags(struct sk_buff skb, struct* page *page,
4539	int offset, size_t size, size_t max_frags)
4540	{
4541	int i = skb_shinfo(skb)->nr_frags;
4542
4543	if (skb_can_coalesce(skb, i, page, off: offset)) {
4544	skb_frag_size_add(frag: &skb_shinfo(skb)->frags[i - `1`], delta: size);
4545	} else if (i < max_frags) {
4546	skb_zcopy_downgrade_managed(skb);
4547	get_page(page);
4548	skb_fill_page_desc_noacc(skb, i, page, off: offset, size);
4549	} else {
4550	return -EMSGSIZE;
4551	}
4552
4553	return `0`;
4554	}
4555	EXPORT_SYMBOL_GPL(skb_append_pagefrags);
4556
4557	/**
4558	* skb_pull_rcsum - pull skb and update receive checksum
4559	* @skb: buffer to update
4560	* @len: length of data pulled
4561	*
4562	* This function performs an skb_pull on the packet and updates
4563	* the CHECKSUM_COMPLETE checksum. It should be used on
4564	* receive path processing instead of skb_pull unless you know
4565	* that the checksum difference is zero (e.g., a valid IP header)
4566	* or you are setting ip_summed to CHECKSUM_NONE.
4567	*/
4568	void skb_pull_rcsum(struct* sk_buff skb, unsigned* int len)
4569	{
4570	unsigned char *data = skb->data;
4571
4572	BUG_ON(len > skb->len);
4573	__skb_pull(skb, len);
4574	skb_postpull_rcsum(skb, start: data, len);
4575	return skb->data;
4576	}
4577	EXPORT_SYMBOL_GPL(skb_pull_rcsum);
4578
4579	static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
4580	{
4581	skb_frag_t head_frag;
4582	struct page *page;
4583
4584	page = virt_to_head_page(x: frag_skb->head);
4585	skb_frag_fill_page_desc(frag: &head_frag, page, off: frag_skb->data -
4586	(unsigned char *)page_address(page),
4587	size: skb_headlen(skb: frag_skb));
4588	return head_frag;
4589	}
4590
4591	struct sk_buff skb_segment_list(struct* sk_buff *skb,
4592	netdev_features_t features,
4593	unsigned int offset)
4594	{
4595	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
4596	unsigned int tnl_hlen = skb_tnl_header_len(inner_skb: skb);
4597	unsigned int delta_truesize = `0`;
4598	unsigned int delta_len = `0`;
4599	struct sk_buff *tail = NULL;
4600	struct sk_buff nskb, tmp;
4601	int len_diff, err;
4602
4603	skb_push(skb, -skb_network_offset(skb) + offset);
4604
4605	/ Ensure the head is writeable before touching the shared info /
4606	err = skb_unclone(skb, GFP_ATOMIC);
4607	if (err)
4608	goto err_linearize;
4609
4610	skb_shinfo(skb)->frag_list = NULL;
4611
4612	while (list_skb) {
4613	nskb = list_skb;
4614	list_skb = list_skb->next;
4615
4616	err = `0`;
4617	delta_truesize += nskb->truesize;
4618	if (skb_shared(skb: nskb)) {
4619	tmp = skb_clone(nskb, GFP_ATOMIC);
4620	if (tmp) {
4621	consume_skb(nskb);
4622	nskb = tmp;
4623	err = skb_unclone(skb: nskb, GFP_ATOMIC);
4624	} else {
4625	err = -ENOMEM;
4626	}
4627	}
4628
4629	if (!tail)
4630	skb->next = nskb;
4631	else
4632	tail->next = nskb;
4633
4634	if (unlikely(err)) {
4635	nskb->next = list_skb;
4636	goto err_linearize;
4637	}
4638
4639	tail = nskb;
4640
4641	delta_len += nskb->len;
4642
4643	skb_push(nskb, -skb_network_offset(skb: nskb) + offset);
4644
4645	skb_release_head_state(skb: nskb);
4646	len_diff = skb_network_header_len(skb: nskb) - skb_network_header_len(skb);
4647	__copy_skb_header(new: nskb, old: skb);
4648
4649	skb_headers_offset_update(nskb, skb_headroom(skb: nskb) - skb_headroom(skb));
4650	nskb->transport_header += len_diff;
4651	skb_copy_from_linear_data_offset(skb, offset: -tnl_hlen,
4652	to: nskb->data - tnl_hlen,
4653	len: offset + tnl_hlen);
4654
4655	if (skb_needs_linearize(skb: nskb, features) &&
4656	__skb_linearize(skb: nskb))
4657	goto err_linearize;
4658	}
4659
4660	skb->truesize = skb->truesize - delta_truesize;
4661	skb->data_len = skb->data_len - delta_len;
4662	skb->len = skb->len - delta_len;
4663
4664	skb_gso_reset(skb);
4665
4666	skb->prev = tail;
4667
4668	if (skb_needs_linearize(skb, features) &&
4669	__skb_linearize(skb))
4670	goto err_linearize;
4671
4672	skb_get(skb);
4673
4674	return skb;
4675
4676	err_linearize:
4677	kfree_skb_list(segs: skb->next);
4678	skb->next = NULL;
4679	return ERR_PTR(error: -ENOMEM);
4680	}
4681	EXPORT_SYMBOL_GPL(skb_segment_list);
4682
4683	/**
4684	* skb_segment - Perform protocol segmentation on skb.
4685	* @head_skb: buffer to segment
4686	* @features: features for the output path (see dev->features)
4687	*
4688	* This function performs segmentation on the given skb. It returns
4689	* a pointer to the first in a list of new skbs for the segments.
4690	* In case of error it returns ERR_PTR(err).
4691	*/
4692	struct sk_buff skb_segment(struct* sk_buff *head_skb,
4693	netdev_features_t features)
4694	{
4695	struct sk_buff *segs = NULL;
4696	struct sk_buff *tail = NULL;
4697	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
4698	unsigned int mss = skb_shinfo(head_skb)->gso_size;
4699	unsigned int doffset = head_skb->data - skb_mac_header(skb: head_skb);
4700	unsigned int offset = doffset;
4701	unsigned int tnl_hlen = skb_tnl_header_len(inner_skb: head_skb);
4702	unsigned int partial_segs = `0`;
4703	unsigned int headroom;
4704	unsigned int len = head_skb->len;
4705	struct sk_buff *frag_skb;
4706	skb_frag_t *frag;
4707	__be16 proto;
4708	bool csum, sg;
4709	int err = -ENOMEM;
4710	int i = `0`;
4711	int nfrags, pos;
4712
4713	if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
4714	mss != GSO_BY_FRAGS && mss != skb_headlen(skb: head_skb)) {
4715	struct sk_buff *check_skb;
4716
4717	for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
4718	if (skb_headlen(skb: check_skb) && !check_skb->head_frag) {
4719	/ gso_size is untrusted, and we have a frag_list with*
4720	* a linear non head_frag item.
4721	*
4722	* If head_skb's headlen does not fit requested gso_size,
4723	* it means that the frag_list members do NOT terminate
4724	* on exact gso_size boundaries. Hence we cannot perform
4725	* skb_frag_t page sharing. Therefore we must fallback to
4726	* copying the frag_list skbs; we do so by disabling SG.
4727	*/
4728	features &= ~NETIF_F_SG;
4729	break;
4730	}
4731	}
4732	}
4733
4734	__skb_push(skb: head_skb, len: doffset);
4735	proto = skb_network_protocol(skb: head_skb, NULL);
4736	if (unlikely(!proto))
4737	return ERR_PTR(error: -EINVAL);
4738
4739	sg = !!(features & NETIF_F_SG);
4740	csum = !!can_checksum_protocol(features, protocol: proto);
4741
4742	if (sg && csum && (mss != GSO_BY_FRAGS)) {
4743	if (!(features & NETIF_F_GSO_PARTIAL)) {
4744	struct sk_buff *iter;
4745	unsigned int frag_len;
4746
4747	if (!list_skb \|\|
4748	!net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
4749	goto normal;
4750
4751	/ If we get here then all the required*
4752	* GSO features except frag_list are supported.
4753	* Try to split the SKB to multiple GSO SKBs
4754	* with no frag_list.
4755	* Currently we can do that only when the buffers don't
4756	* have a linear part and all the buffers except
4757	* the last are of the same length.
4758	*/
4759	frag_len = list_skb->len;
4760	skb_walk_frags(head_skb, iter) {
4761	if (frag_len != iter->len && iter->next)
4762	goto normal;
4763	if (skb_headlen(skb: iter) && !iter->head_frag)
4764	goto normal;
4765
4766	len -= iter->len;
4767	}
4768
4769	if (len != frag_len)
4770	goto normal;
4771	}
4772
4773	/ GSO partial only requires that we trim off any excess that*
4774	* doesn't fit into an MSS sized block, so take care of that
4775	* now.
4776	* Cap len to not accidentally hit GSO_BY_FRAGS.
4777	*/
4778	partial_segs = min(len, GSO_BY_FRAGS - `1`) / mss;
4779	if (partial_segs > `1`)
4780	mss *= partial_segs;
4781	else
4782	partial_segs = `0`;
4783	}
4784
4785	normal:
4786	headroom = skb_headroom(skb: head_skb);
4787	pos = skb_headlen(skb: head_skb);
4788
4789	if (skb_orphan_frags(skb: head_skb, GFP_ATOMIC))
4790	return ERR_PTR(error: -ENOMEM);
4791
4792	nfrags = skb_shinfo(head_skb)->nr_frags;
4793	frag = skb_shinfo(head_skb)->frags;
4794	frag_skb = head_skb;
4795
4796	do {
4797	struct sk_buff *nskb;
4798	skb_frag_t *nskb_frag;
4799	int hsize;
4800	int size;
4801
4802	if (unlikely(mss == GSO_BY_FRAGS)) {
4803	len = list_skb->len;
4804	} else {
4805	len = head_skb->len - offset;
4806	if (len > mss)
4807	len = mss;
4808	}
4809
4810	hsize = skb_headlen(skb: head_skb) - offset;
4811
4812	if (hsize <= `0` && i >= nfrags && skb_headlen(skb: list_skb) &&
4813	(skb_headlen(skb: list_skb) == len \|\| sg)) {
4814	BUG_ON(skb_headlen(list_skb) > len);
4815
4816	nskb = skb_clone(list_skb, GFP_ATOMIC);
4817	if (unlikely(!nskb))
4818	goto err;
4819
4820	i = `0`;
4821	nfrags = skb_shinfo(list_skb)->nr_frags;
4822	frag = skb_shinfo(list_skb)->frags;
4823	frag_skb = list_skb;
4824	pos += skb_headlen(skb: list_skb);
4825
4826	while (pos < offset + len) {
4827	BUG_ON(i >= nfrags);
4828
4829	size = skb_frag_size(frag);
4830	if (pos + size > offset + len)
4831	break;
4832
4833	i++;
4834	pos += size;
4835	frag++;
4836	}
4837
4838	list_skb = list_skb->next;
4839
4840	if (unlikely(pskb_trim(nskb, len))) {
4841	kfree_skb(skb: nskb);
4842	goto err;
4843	}
4844
4845	hsize = skb_end_offset(skb: nskb);
4846	if (skb_cow_head(skb: nskb, headroom: doffset + headroom)) {
4847	kfree_skb(skb: nskb);
4848	goto err;
4849	}
4850
4851	nskb->truesize += skb_end_offset(skb: nskb) - hsize;
4852	skb_release_head_state(skb: nskb);
4853	__skb_push(skb: nskb, len: doffset);
4854	} else {
4855	if (hsize < `0`)
4856	hsize = `0`;
4857	if (hsize > len \|\| !sg)
4858	hsize = len;
4859
4860	nskb = __alloc_skb(hsize + doffset + headroom,
4861	GFP_ATOMIC, skb_alloc_rx_flag(skb: head_skb),
4862	NUMA_NO_NODE);
4863
4864	if (unlikely(!nskb))
4865	goto err;
4866
4867	skb_reserve(skb: nskb, len: headroom);
4868	__skb_put(skb: nskb, len: doffset);
4869	}
4870
4871	if (segs)
4872	tail->next = nskb;
4873	else
4874	segs = nskb;
4875	tail = nskb;
4876
4877	__copy_skb_header(new: nskb, old: head_skb);
4878
4879	skb_headers_offset_update(nskb, skb_headroom(skb: nskb) - headroom);
4880	skb_reset_mac_len(skb: nskb);
4881
4882	skb_copy_from_linear_data_offset(skb: head_skb, offset: -tnl_hlen,
4883	to: nskb->data - tnl_hlen,
4884	len: doffset + tnl_hlen);
4885
4886	if (nskb->len == len + doffset)
4887	goto perform_csum_check;
4888
4889	if (!sg) {
4890	if (!csum) {
4891	if (!nskb->remcsum_offload)
4892	nskb->ip_summed = CHECKSUM_NONE;
4893	SKB_GSO_CB(nskb)->csum =
4894	skb_copy_and_csum_bits(head_skb, offset,
4895	skb_put(nskb,
4896	len),
4897	len);
4898	SKB_GSO_CB(nskb)->csum_start =
4899	skb_headroom(skb: nskb) + doffset;
4900	} else {
4901	if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
4902	goto err;
4903	}
4904	continue;
4905	}
4906
4907	nskb_frag = skb_shinfo(nskb)->frags;
4908
4909	skb_copy_from_linear_data_offset(skb: head_skb, offset,
4910	to: skb_put(nskb, hsize), len: hsize);
4911
4912	skb_shinfo(nskb)->flags \|= skb_shinfo(head_skb)->flags &
4913	SKBFL_SHARED_FRAG;
4914
4915	if (skb_zerocopy_clone(nskb, orig: frag_skb, GFP_ATOMIC))
4916	goto err;
4917
4918	while (pos < offset + len) {
4919	if (i >= nfrags) {
4920	if (skb_orphan_frags(skb: list_skb, GFP_ATOMIC) \|\|
4921	skb_zerocopy_clone(nskb, orig: list_skb,
4922	GFP_ATOMIC))
4923	goto err;
4924
4925	i = `0`;
4926	nfrags = skb_shinfo(list_skb)->nr_frags;
4927	frag = skb_shinfo(list_skb)->frags;
4928	frag_skb = list_skb;
4929	if (!skb_headlen(skb: list_skb)) {
4930	BUG_ON(!nfrags);
4931	} else {
4932	BUG_ON(!list_skb->head_frag);
4933
4934	/ to make room for head_frag. /
4935	i--;
4936	frag--;
4937	}
4938
4939	list_skb = list_skb->next;
4940	}
4941
4942	if (unlikely(skb_shinfo(nskb)->nr_frags >=
4943	MAX_SKB_FRAGS)) {
4944	net_warn_ratelimited(
4945	"skb_segment: too many frags: %u %u\n",
4946	pos, mss);
4947	err = -EINVAL;
4948	goto err;
4949	}
4950
4951	nskb_frag = (i < `0`) ? skb_head_frag_to_page_desc(frag_skb) : frag;
4952	__skb_frag_ref(frag: nskb_frag);
4953	size = skb_frag_size(frag: nskb_frag);
4954
4955	if (pos < offset) {
4956	skb_frag_off_add(frag: nskb_frag, delta: offset - pos);
4957	skb_frag_size_sub(frag: nskb_frag, delta: offset - pos);
4958	}
4959
4960	skb_shinfo(nskb)->nr_frags++;
4961
4962	if (pos + size <= offset + len) {
4963	i++;
4964	frag++;
4965	pos += size;
4966	} else {
4967	skb_frag_size_sub(frag: nskb_frag, delta: pos + size - (offset + len));
4968	goto skip_fraglist;
4969	}
4970
4971	nskb_frag++;
4972	}
4973
4974	skip_fraglist:
4975	nskb->data_len = len - hsize;
4976	nskb->len += nskb->data_len;
4977	nskb->truesize += nskb->data_len;
4978
4979	perform_csum_check:
4980	if (!csum) {
4981	if (skb_has_shared_frag(skb: nskb) &&
4982	__skb_linearize(skb: nskb))
4983	goto err;
4984
4985	if (!nskb->remcsum_offload)
4986	nskb->ip_summed = CHECKSUM_NONE;
4987	SKB_GSO_CB(nskb)->csum =
4988	skb_checksum(nskb, doffset,
4989	nskb->len - doffset, `0`);
4990	SKB_GSO_CB(nskb)->csum_start =
4991	skb_headroom(skb: nskb) + doffset;
4992	}
4993	} while ((offset += len) < head_skb->len);
4994
4995	/ Some callers want to get the end of the list.*
4996	* Put it in segs->prev to avoid walking the list.
4997	* (see validate_xmit_skb_list() for example)
4998	*/
4999	segs->prev = tail;
5000
5001	if (partial_segs) {
5002	struct sk_buff *iter;
5003	int type = skb_shinfo(head_skb)->gso_type;
5004	unsigned short gso_size = skb_shinfo(head_skb)->gso_size;
5005
5006	/ Update type to add partial and then remove dodgy if set /
5007	type \|= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
5008	type &= ~SKB_GSO_DODGY;
5009
5010	/ Update GSO info and prepare to start updating headers on*
5011	* our way back down the stack of protocols.
5012	*/
5013	for (iter = segs; iter; iter = iter->next) {
5014	skb_shinfo(iter)->gso_size = gso_size;
5015	skb_shinfo(iter)->gso_segs = partial_segs;
5016	skb_shinfo(iter)->gso_type = type;
5017	SKB_GSO_CB(iter)->data_offset = skb_headroom(skb: iter) + doffset;
5018	}
5019
5020	if (tail->len - doffset <= gso_size)
5021	skb_shinfo(tail)->gso_size = `0`;
5022	else if (tail != segs)
5023	skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
5024	}
5025
5026	/ Following permits correct backpressure, for protocols*
5027	* using skb_set_owner_w().
5028	* Idea is to tranfert ownership from head_skb to last segment.
5029	*/
5030	if (head_skb->destructor == sock_wfree) {
5031	swap(tail->truesize, head_skb->truesize);
5032	swap(tail->destructor, head_skb->destructor);
5033	swap(tail->sk, head_skb->sk);
5034	}
5035	return segs;
5036
5037	err:
5038	kfree_skb_list(segs);
5039	return ERR_PTR(error: err);
5040	}
5041	EXPORT_SYMBOL_GPL(skb_segment);
5042
5043	#ifdef CONFIG_SKB_EXTENSIONS
5044	#define SKB_EXT_ALIGN_VALUE 8
5045	#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
5046
5047	static const u8 skb_ext_type_len[] = {
5048	#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
5049	[SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
5050	#endif
5051	#ifdef CONFIG_XFRM
5052	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
5053	#endif
5054	#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5055	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
5056	#endif
5057	#if IS_ENABLED(CONFIG_MPTCP)
5058	[SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
5059	#endif
5060	#if IS_ENABLED(CONFIG_MCTP_FLOWS)
5061	[SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
5062	#endif
5063	};
5064
5065	static __always_inline unsigned int skb_ext_total_length(void)
5066	{
5067	unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext);
5068	int i;
5069
5070	for (i = `0`; i < ARRAY_SIZE(skb_ext_type_len); i++)
5071	l += skb_ext_type_len[i];
5072
5073	return l;
5074	}
5075
5076	static void skb_extensions_init(void)
5077	{
5078	BUILD_BUG_ON(SKB_EXT_NUM >= `8`);
5079	#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL)
5080	BUILD_BUG_ON(skb_ext_total_length() > `255`);
5081	#endif
5082
5083	skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
5084	SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
5085	`0`,
5086	SLAB_HWCACHE_ALIGN\|SLAB_PANIC,
5087	NULL);
5088	}
5089	#else
5090	static void skb_extensions_init(void) {}
5091	#endif
5092
5093	/ The SKB kmem_cache slab is critical for network performance. Never*
5094	* merge/alias the slab with similar sized objects. This avoids fragmentation
5095	* that hurts performance of kmem_cache_{alloc,free}_bulk APIs.
5096	*/
5097	#ifndef CONFIG_SLUB_TINY
5098	#define FLAG_SKB_NO_MERGE SLAB_NO_MERGE
5099	#else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */
5100	#define FLAG_SKB_NO_MERGE 0
5101	#endif
5102
5103	void __init skb_init(void)
5104	{
5105	net_hotdata.skbuff_cache = kmem_cache_create_usercopy(name: "skbuff_head_cache",
5106	size: sizeof(struct sk_buff),
5107	align: `0`,
5108	SLAB_HWCACHE_ALIGN\|SLAB_PANIC\|
5109	FLAG_SKB_NO_MERGE,
5110	offsetof(struct sk_buff, cb),
5111	sizeof_field(struct sk_buff, cb),
5112	NULL);
5113	net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
5114	sizeof(struct sk_buff_fclones),
5115	`0`,
5116	SLAB_HWCACHE_ALIGN\|SLAB_PANIC,
5117	NULL);
5118	/ usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.*
5119	* struct skb_shared_info is located at the end of skb->head,
5120	* and should not be copied to/from user.
5121	*/
5122	net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy(name: "skbuff_small_head",
5123	SKB_SMALL_HEAD_CACHE_SIZE,
5124	align: `0`,
5125	SLAB_HWCACHE_ALIGN \| SLAB_PANIC,
5126	useroffset: `0`,
5127	SKB_SMALL_HEAD_HEADROOM,
5128	NULL);
5129	skb_extensions_init();
5130	}
5131
5132	static int
5133	__skb_to_sgvec(struct sk_buff skb, struct* scatterlist sg, int* offset, int len,
5134	unsigned int recursion_level)
5135	{
5136	int start = skb_headlen(skb);
5137	int i, copy = start - offset;
5138	struct sk_buff *frag_iter;
5139	int elt = `0`;
5140
5141	if (unlikely(recursion_level >= `24`))
5142	return -EMSGSIZE;
5143
5144	if (copy > `0`) {
5145	if (copy > len)
5146	copy = len;
5147	sg_set_buf(sg, buf: skb->data + offset, buflen: copy);
5148	elt++;
5149	if ((len -= copy) == `0`)
5150	return elt;
5151	offset += copy;
5152	}
5153
5154	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
5155	int end;
5156
5157	WARN_ON(start > offset + len);
5158
5159	end = start + skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
5160	if ((copy = end - offset) > `0`) {
5161	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
5162	if (unlikely(elt && sg_is_last(&sg[elt - `1`])))
5163	return -EMSGSIZE;
5164
5165	if (copy > len)
5166	copy = len;
5167	sg_set_page(sg: &sg[elt], page: skb_frag_page(frag), len: copy,
5168	offset: skb_frag_off(frag) + offset - start);
5169	elt++;
5170	if (!(len -= copy))
5171	return elt;
5172	offset += copy;
5173	}
5174	start = end;
5175	}
5176
5177	skb_walk_frags(skb, frag_iter) {
5178	int end, ret;
5179
5180	WARN_ON(start > offset + len);
5181
5182	end = start + frag_iter->len;
5183	if ((copy = end - offset) > `0`) {
5184	if (unlikely(elt && sg_is_last(&sg[elt - `1`])))
5185	return -EMSGSIZE;
5186
5187	if (copy > len)
5188	copy = len;
5189	ret = __skb_to_sgvec(skb: frag_iter, sg: sg+elt, offset: offset - start,
5190	len: copy, recursion_level: recursion_level + `1`);
5191	if (unlikely(ret < `0`))
5192	return ret;
5193	elt += ret;
5194	if ((len -= copy) == `0`)
5195	return elt;
5196	offset += copy;
5197	}
5198	start = end;
5199	}
5200	BUG_ON(len);
5201	return elt;
5202	}
5203
5204	/**
5205	* skb_to_sgvec - Fill a scatter-gather list from a socket buffer
5206	* @skb: Socket buffer containing the buffers to be mapped
5207	* @sg: The scatter-gather list to map into
5208	* @offset: The offset into the buffer's contents to start mapping
5209	* @len: Length of buffer space to be mapped
5210	*
5211	* Fill the specified scatter-gather list with mappings/pointers into a
5212	* region of the buffer space attached to a socket buffer. Returns either
5213	* the number of scatterlist items used, or -EMSGSIZE if the contents
5214	* could not fit.
5215	*/
5216	int skb_to_sgvec(struct sk_buff skb, struct* scatterlist sg, int* offset, int len)
5217	{
5218	int nsg = __skb_to_sgvec(skb, sg, offset, len, recursion_level: `0`);
5219
5220	if (nsg <= `0`)
5221	return nsg;
5222
5223	sg_mark_end(sg: &sg[nsg - `1`]);
5224
5225	return nsg;
5226	}
5227	EXPORT_SYMBOL_GPL(skb_to_sgvec);
5228
5229	/ As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given*
5230	* sglist without mark the sg which contain last skb data as the end.
5231	* So the caller can mannipulate sg list as will when padding new data after
5232	* the first call without calling sg_unmark_end to expend sg list.
5233	*
5234	* Scenario to use skb_to_sgvec_nomark:
5235	* 1. sg_init_table
5236	* 2. skb_to_sgvec_nomark(payload1)
5237	* 3. skb_to_sgvec_nomark(payload2)
5238	*
5239	* This is equivalent to:
5240	* 1. sg_init_table
5241	* 2. skb_to_sgvec(payload1)
5242	* 3. sg_unmark_end
5243	* 4. skb_to_sgvec(payload2)
5244	*
5245	* When mapping multiple payload conditionally, skb_to_sgvec_nomark
5246	* is more preferable.
5247	*/
5248	int skb_to_sgvec_nomark(struct sk_buff skb, struct* scatterlist *sg,
5249	int offset, int len)
5250	{
5251	return __skb_to_sgvec(skb, sg, offset, len, recursion_level: `0`);
5252	}
5253	EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
5254
5255
5256
5257	/**
5258	* skb_cow_data - Check that a socket buffer's data buffers are writable
5259	* @skb: The socket buffer to check.
5260	* @tailbits: Amount of trailing space to be added
5261	* @trailer: Returned pointer to the skb where the @tailbits space begins
5262	*
5263	* Make sure that the data buffers attached to a socket buffer are
5264	* writable. If they are not, private copies are made of the data buffers
5265	* and the socket buffer is set to use these instead.
5266	*
5267	* If @tailbits is given, make sure that there is space to write @tailbits
5268	* bytes of data beyond current end of socket buffer. @trailer will be
5269	* set to point to the skb in which this space begins.
5270	*
5271	* The number of scatterlist elements required to completely map the
5272	* COW'd and extended socket buffer will be returned.
5273	*/
5274	int skb_cow_data(struct sk_buff skb, int* tailbits, struct sk_buff **trailer)
5275	{
5276	int copyflag;
5277	int elt;
5278	struct sk_buff skb1, *skb_p;
5279
5280	/ If skb is cloned or its head is paged, reallocate*
5281	* head pulling out all the pages (pages are considered not writable
5282	* at the moment even if they are anonymous).
5283	*/
5284	if ((skb_cloned(skb) \|\| skb_shinfo(skb)->nr_frags) &&
5285	!__pskb_pull_tail(skb, __skb_pagelen(skb)))
5286	return -ENOMEM;
5287
5288	/ Easy case. Most of packets will go this way. /
5289	if (!skb_has_frag_list(skb)) {
5290	/ A little of trouble, not enough of space for trailer.*
5291	* This should not happen, when stack is tuned to generate
5292	* good frames. OK, on miss we reallocate and reserve even more
5293	* space, 128 bytes is fair. */
5294
5295	if (skb_tailroom(skb) < tailbits &&
5296	pskb_expand_head(skb, `0`, tailbits-skb_tailroom(skb)+`128`, GFP_ATOMIC))
5297	return -ENOMEM;
5298
5299	/ Voila! /
5300	*trailer = skb;
5301	return `1`;
5302	}
5303
5304	/ Misery. We are in troubles, going to mincer fragments... /
5305
5306	elt = `1`;
5307	skb_p = &skb_shinfo(skb)->frag_list;
5308	copyflag = `0`;
5309
5310	while ((skb1 = *skb_p) != NULL) {
5311	int ntail = `0`;
5312
5313	/ The fragment is partially pulled by someone,*
5314	* this can happen on input. Copy it and everything
5315	* after it. */
5316
5317	if (skb_shared(skb: skb1))
5318	copyflag = `1`;
5319
5320	/ If the skb is the last, worry about trailer. /
5321
5322	if (skb1->next == NULL && tailbits) {
5323	if (skb_shinfo(skb1)->nr_frags \|\|
5324	skb_has_frag_list(skb: skb1) \|\|
5325	skb_tailroom(skb: skb1) < tailbits)
5326	ntail = tailbits + `128`;
5327	}
5328
5329	if (copyflag \|\|
5330	skb_cloned(skb: skb1) \|\|
5331	ntail \|\|
5332	skb_shinfo(skb1)->nr_frags \|\|
5333	skb_has_frag_list(skb: skb1)) {
5334	struct sk_buff *skb2;
5335
5336	/ Fuck, we are miserable poor guys... /
5337	if (ntail == `0`)
5338	skb2 = skb_copy(skb1, GFP_ATOMIC);
5339	else
5340	skb2 = skb_copy_expand(skb1,
5341	skb_headroom(skb: skb1),
5342	ntail,
5343	GFP_ATOMIC);
5344	if (unlikely(skb2 == NULL))
5345	return -ENOMEM;
5346
5347	if (skb1->sk)
5348	skb_set_owner_w(skb: skb2, sk: skb1->sk);
5349
5350	/ Looking around. Are we still alive?*
5351	* OK, link new skb, drop old one */
5352
5353	skb2->next = skb1->next;
5354	*skb_p = skb2;
5355	kfree_skb(skb: skb1);
5356	skb1 = skb2;
5357	}
5358	elt++;
5359	*trailer = skb1;
5360	skb_p = &skb1->next;
5361	}
5362
5363	return elt;
5364	}
5365	EXPORT_SYMBOL_GPL(skb_cow_data);
5366
5367	static void sock_rmem_free(struct sk_buff *skb)
5368	{
5369	struct sock *sk = skb->sk;
5370
5371	atomic_sub(i: skb->truesize, v: &sk->sk_rmem_alloc);
5372	}
5373
5374	static void skb_set_err_queue(struct sk_buff *skb)
5375	{
5376	/ pkt_type of skbs received on local sockets is never PACKET_OUTGOING.*
5377	* So, it is safe to (mis)use it to mark skbs on the error queue.
5378	*/
5379	skb->pkt_type = PACKET_OUTGOING;
5380	BUILD_BUG_ON(PACKET_OUTGOING == `0`);
5381	}
5382
5383	/*
5384	* Note: We dont mem charge error packets (no sk_forward_alloc changes)
5385	*/
5386	int sock_queue_err_skb(struct sock sk, struct* sk_buff *skb)
5387	{
5388	if (atomic_read(v: &sk->sk_rmem_alloc) + skb->truesize >=
5389	(unsigned int)READ_ONCE(sk->sk_rcvbuf))
5390	return -ENOMEM;
5391
5392	skb_orphan(skb);
5393	skb->sk = sk;
5394	skb->destructor = sock_rmem_free;
5395	atomic_add(i: skb->truesize, v: &sk->sk_rmem_alloc);
5396	skb_set_err_queue(skb);
5397
5398	/ before exiting rcu section, make sure dst is refcounted /
5399	skb_dst_force(skb);
5400
5401	skb_queue_tail(&sk->sk_error_queue, skb);
5402	if (!sock_flag(sk, flag: SOCK_DEAD))
5403	sk_error_report(sk);
5404	return `0`;
5405	}
5406	EXPORT_SYMBOL(sock_queue_err_skb);
5407
5408	static bool is_icmp_err_skb(const struct sk_buff *skb)
5409	{
5410	return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP \|\|
5411	SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
5412	}
5413
5414	struct sk_buff sock_dequeue_err_skb(struct* sock *sk)
5415	{
5416	struct sk_buff_head *q = &sk->sk_error_queue;
5417	struct sk_buff skb, skb_next = NULL;
5418	bool icmp_next = false;
5419	unsigned long flags;
5420
5421	if (skb_queue_empty_lockless(list: q))
5422	return NULL;
5423
5424	spin_lock_irqsave(&q->lock, flags);
5425	skb = __skb_dequeue(list: q);
5426	if (skb && (skb_next = skb_peek(list_: q))) {
5427	icmp_next = is_icmp_err_skb(skb: skb_next);
5428	if (icmp_next)
5429	sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
5430	}
5431	spin_unlock_irqrestore(lock: &q->lock, flags);
5432
5433	if (is_icmp_err_skb(skb) && !icmp_next)
5434	sk->sk_err = `0`;
5435
5436	if (skb_next)
5437	sk_error_report(sk);
5438
5439	return skb;
5440	}
5441	EXPORT_SYMBOL(sock_dequeue_err_skb);
5442
5443	/**
5444	* skb_clone_sk - create clone of skb, and take reference to socket
5445	* @skb: the skb to clone
5446	*
5447	* This function creates a clone of a buffer that holds a reference on
5448	* sk_refcnt. Buffers created via this function are meant to be
5449	* returned using sock_queue_err_skb, or free via kfree_skb.
5450	*
5451	* When passing buffers allocated with this function to sock_queue_err_skb
5452	* it is necessary to wrap the call with sock_hold/sock_put in order to
5453	* prevent the socket from being released prior to being enqueued on
5454	* the sk_error_queue.
5455	*/
5456	struct sk_buff skb_clone_sk(struct* sk_buff *skb)
5457	{
5458	struct sock *sk = skb->sk;
5459	struct sk_buff *clone;
5460
5461	if (!sk \|\| !refcount_inc_not_zero(r: &sk->sk_refcnt))
5462	return NULL;
5463
5464	clone = skb_clone(skb, GFP_ATOMIC);
5465	if (!clone) {
5466	sock_put(sk);
5467	return NULL;
5468	}
5469
5470	clone->sk = sk;
5471	clone->destructor = sock_efree;
5472
5473	return clone;
5474	}
5475	EXPORT_SYMBOL(skb_clone_sk);
5476
5477	static void __skb_complete_tx_timestamp(struct sk_buff *skb,
5478	struct sock *sk,
5479	int tstype,
5480	bool opt_stats)
5481	{
5482	struct sock_exterr_skb *serr;
5483	int err;
5484
5485	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
5486
5487	serr = SKB_EXT_ERR(skb);
5488	memset(serr, `0`, sizeof(*serr));
5489	serr->ee.ee_errno = ENOMSG;
5490	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
5491	serr->ee.ee_info = tstype;
5492	serr->opt_stats = opt_stats;
5493	serr->header.h4.iif = skb->dev ? skb->dev->ifindex : `0`;
5494	if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
5495	serr->ee.ee_data = skb_shinfo(skb)->tskey;
5496	if (sk_is_tcp(sk))
5497	serr->ee.ee_data -= atomic_read(v: &sk->sk_tskey);
5498	}
5499
5500	err = sock_queue_err_skb(sk, skb);
5501
5502	if (err)
5503	kfree_skb(skb);
5504	}
5505
5506	static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
5507	{
5508	bool ret;
5509
5510	if (likely(tsonly \|\| READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data)))
5511	return true;
5512
5513	read_lock_bh(&sk->sk_callback_lock);
5514	ret = sk->sk_socket && sk->sk_socket->file &&
5515	file_ns_capable(file: sk->sk_socket->file, ns: &init_user_ns, CAP_NET_RAW);
5516	read_unlock_bh(&sk->sk_callback_lock);
5517	return ret;
5518	}
5519
5520	void skb_complete_tx_timestamp(struct sk_buff *skb,
5521	struct skb_shared_hwtstamps *hwtstamps)
5522	{
5523	struct sock *sk = skb->sk;
5524
5525	if (!skb_may_tx_timestamp(sk, tsonly: false))
5526	goto err;
5527
5528	/ Take a reference to prevent skb_orphan() from freeing the socket,*
5529	* but only if the socket refcount is not zero.
5530	*/
5531	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
5532	skb_hwtstamps(skb) = hwtstamps;
5533	__skb_complete_tx_timestamp(skb, sk, tstype: SCM_TSTAMP_SND, opt_stats: false);
5534	sock_put(sk);
5535	return;
5536	}
5537
5538	err:
5539	kfree_skb(skb);
5540	}
5541	EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
5542
5543	static bool skb_tstamp_tx_report_so_timestamping(struct sk_buff *skb,
5544	struct skb_shared_hwtstamps *hwtstamps,
5545	int tstype)
5546	{
5547	switch (tstype) {
5548	case SCM_TSTAMP_SCHED:
5549	return skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP;
5550	case SCM_TSTAMP_SND:
5551	return skb_shinfo(skb)->tx_flags & (hwtstamps ? SKBTX_HW_TSTAMP_NOBPF :
5552	SKBTX_SW_TSTAMP);
5553	case SCM_TSTAMP_ACK:
5554	return TCP_SKB_CB(skb)->txstamp_ack & TSTAMP_ACK_SK;
5555	case SCM_TSTAMP_COMPLETION:
5556	return skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP;
5557	}
5558
5559	return false;
5560	}
5561
5562	static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb,
5563	struct skb_shared_hwtstamps *hwtstamps,
5564	struct sock *sk,
5565	int tstype)
5566	{
5567	int op;
5568
5569	switch (tstype) {
5570	case SCM_TSTAMP_SCHED:
5571	op = BPF_SOCK_OPS_TSTAMP_SCHED_CB;
5572	break;
5573	case SCM_TSTAMP_SND:
5574	if (hwtstamps) {
5575	op = BPF_SOCK_OPS_TSTAMP_SND_HW_CB;
5576	skb_hwtstamps(skb) = hwtstamps;
5577	} else {
5578	op = BPF_SOCK_OPS_TSTAMP_SND_SW_CB;
5579	}
5580	break;
5581	case SCM_TSTAMP_ACK:
5582	op = BPF_SOCK_OPS_TSTAMP_ACK_CB;
5583	break;
5584	default:
5585	return;
5586	}
5587
5588	bpf_skops_tx_timestamping(sk, skb, op);
5589	}
5590
5591	void __skb_tstamp_tx(struct sk_buff *orig_skb,
5592	const struct sk_buff *ack_skb,
5593	struct skb_shared_hwtstamps *hwtstamps,
5594	struct sock sk, int* tstype)
5595	{
5596	struct sk_buff *skb;
5597	bool tsonly, opt_stats = false;
5598	u32 tsflags;
5599
5600	if (!sk)
5601	return;
5602
5603	if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
5604	skb_tstamp_tx_report_bpf_timestamping(skb: orig_skb, hwtstamps,
5605	sk, tstype);
5606
5607	if (!skb_tstamp_tx_report_so_timestamping(skb: orig_skb, hwtstamps, tstype))
5608	return;
5609
5610	tsflags = READ_ONCE(sk->sk_tsflags);
5611	if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
5612	skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
5613	return;
5614
5615	tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
5616	if (!skb_may_tx_timestamp(sk, tsonly))
5617	return;
5618
5619	if (tsonly) {
5620	#ifdef CONFIG_INET
5621	if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
5622	sk_is_tcp(sk)) {
5623	skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
5624	ack_skb);
5625	opt_stats = true;
5626	} else
5627	#endif
5628	skb = alloc_skb(size: `0`, GFP_ATOMIC);
5629	} else {
5630	skb = skb_clone(orig_skb, GFP_ATOMIC);
5631
5632	if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
5633	kfree_skb(skb);
5634	return;
5635	}
5636	}
5637	if (!skb)
5638	return;
5639
5640	if (tsonly) {
5641	skb_shinfo(skb)->tx_flags \|= skb_shinfo(orig_skb)->tx_flags &
5642	SKBTX_ANY_TSTAMP;
5643	skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
5644	}
5645
5646	if (hwtstamps)
5647	skb_hwtstamps(skb) = hwtstamps;
5648	else
5649	__net_timestamp(skb);
5650
5651	__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
5652	}
5653	EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
5654
5655	void skb_tstamp_tx(struct sk_buff *orig_skb,
5656	struct skb_shared_hwtstamps *hwtstamps)
5657	{
5658	return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
5659	SCM_TSTAMP_SND);
5660	}
5661	EXPORT_SYMBOL_GPL(skb_tstamp_tx);
5662
5663	#ifdef CONFIG_WIRELESS
5664	void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
5665	{
5666	struct sock *sk = skb->sk;
5667	struct sock_exterr_skb *serr;
5668	int err = `1`;
5669
5670	skb->wifi_acked_valid = `1`;
5671	skb->wifi_acked = acked;
5672
5673	serr = SKB_EXT_ERR(skb);
5674	memset(serr, `0`, sizeof(*serr));
5675	serr->ee.ee_errno = ENOMSG;
5676	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
5677
5678	/ Take a reference to prevent skb_orphan() from freeing the socket,*
5679	* but only if the socket refcount is not zero.
5680	*/
5681	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
5682	err = sock_queue_err_skb(sk, skb);
5683	sock_put(sk);
5684	}
5685	if (err)
5686	kfree_skb(skb);
5687	}
5688	EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
5689	#endif /* CONFIG_WIRELESS */
5690
5691	/**
5692	* skb_partial_csum_set - set up and verify partial csum values for packet
5693	* @skb: the skb to set
5694	* @start: the number of bytes after skb->data to start checksumming.
5695	* @off: the offset from start to place the checksum.
5696	*
5697	* For untrusted partially-checksummed packets, we need to make sure the values
5698	* for skb->csum_start and skb->csum_offset are valid so we don't oops.
5699	*
5700	* This function checks and sets those values and skb->ip_summed: if this
5701	* returns false you should drop the packet.
5702	*/
5703	bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
5704	{
5705	u32 csum_end = (u32)start + (u32)off + sizeof(__sum16);
5706	u32 csum_start = skb_headroom(skb) + (u32)start;
5707
5708	if (unlikely(csum_start >= U16_MAX \|\| csum_end > skb_headlen(skb))) {
5709	net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n",
5710	start, off, skb_headroom(skb), skb_headlen(skb));
5711	return false;
5712	}
5713	skb->ip_summed = CHECKSUM_PARTIAL;
5714	skb->csum_start = csum_start;
5715	skb->csum_offset = off;
5716	skb->transport_header = csum_start;
5717	return true;
5718	}
5719	EXPORT_SYMBOL_GPL(skb_partial_csum_set);
5720
5721	static int skb_maybe_pull_tail(struct sk_buff skb, unsigned* int len,
5722	unsigned int max)
5723	{
5724	if (skb_headlen(skb) >= len)
5725	return `0`;
5726
5727	/ If we need to pullup then pullup to the max, so we*
5728	* won't need to do it again.
5729	*/
5730	if (max > skb->len)
5731	max = skb->len;
5732
5733	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
5734	return -ENOMEM;
5735
5736	if (skb_headlen(skb) < len)
5737	return -EPROTO;
5738
5739	return `0`;
5740	}
5741
5742	#define MAX_TCP_HDR_LEN (15 * 4)
5743
5744	static __sum16 skb_checksum_setup_ip(struct* sk_buff *skb,
5745	typeof(IPPROTO_IP) proto,
5746	unsigned int off)
5747	{
5748	int err;
5749
5750	switch (proto) {
5751	case IPPROTO_TCP:
5752	err = skb_maybe_pull_tail(skb, len: off + sizeof(struct tcphdr),
5753	max: off + MAX_TCP_HDR_LEN);
5754	if (!err && !skb_partial_csum_set(skb, off,
5755	offsetof(struct tcphdr,
5756	check)))
5757	err = -EPROTO;
5758	return err ? ERR_PTR(error: err) : &tcp_hdr(skb)->check;
5759
5760	case IPPROTO_UDP:
5761	err = skb_maybe_pull_tail(skb, len: off + sizeof(struct udphdr),
5762	max: off + sizeof(struct udphdr));
5763	if (!err && !skb_partial_csum_set(skb, off,
5764	offsetof(struct udphdr,
5765	check)))
5766	err = -EPROTO;
5767	return err ? ERR_PTR(error: err) : &udp_hdr(skb)->check;
5768	}
5769
5770	return ERR_PTR(error: -EPROTO);
5771	}
5772
5773	/ This value should be large enough to cover a tagged ethernet header plus*
5774	* maximally sized IP and TCP or UDP headers.
5775	*/
5776	#define MAX_IP_HDR_LEN 128
5777
5778	static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
5779	{
5780	unsigned int off;
5781	bool fragment;
5782	__sum16 *csum;
5783	int err;
5784
5785	fragment = false;
5786
5787	err = skb_maybe_pull_tail(skb,
5788	len: sizeof(struct iphdr),
5789	MAX_IP_HDR_LEN);
5790	if (err < `0`)
5791	goto out;
5792
5793	if (ip_is_fragment(iph: ip_hdr(skb)))
5794	fragment = true;
5795
5796	off = ip_hdrlen(skb);
5797
5798	err = -EPROTO;
5799
5800	if (fragment)
5801	goto out;
5802
5803	csum = skb_checksum_setup_ip(skb, proto: ip_hdr(skb)->protocol, off);
5804	if (IS_ERR(ptr: csum))
5805	return PTR_ERR(ptr: csum);
5806
5807	if (recalculate)
5808	*csum = ~csum_tcpudp_magic(saddr: ip_hdr(skb)->saddr,
5809	daddr: ip_hdr(skb)->daddr,
5810	len: skb->len - off,
5811	proto: ip_hdr(skb)->protocol, sum: `0`);
5812	err = `0`;
5813
5814	out:
5815	return err;
5816	}
5817
5818	/ This value should be large enough to cover a tagged ethernet header plus*
5819	* an IPv6 header, all options, and a maximal TCP or UDP header.
5820	*/
5821	#define MAX_IPV6_HDR_LEN 256
5822
5823	#define OPT_HDR(type, skb, off) \
5824	(type *)(skb_network_header(skb) + (off))
5825
5826	static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
5827	{
5828	int err;
5829	u8 nexthdr;
5830	unsigned int off;
5831	unsigned int len;
5832	bool fragment;
5833	bool done;
5834	__sum16 *csum;
5835
5836	fragment = false;
5837	done = false;
5838
5839	off = sizeof(struct ipv6hdr);
5840
5841	err = skb_maybe_pull_tail(skb, len: off, MAX_IPV6_HDR_LEN);
5842	if (err < `0`)
5843	goto out;
5844
5845	nexthdr = ipv6_hdr(skb)->nexthdr;
5846
5847	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
5848	while (off <= len && !done) {
5849	switch (nexthdr) {
5850	case IPPROTO_DSTOPTS:
5851	case IPPROTO_HOPOPTS:
5852	case IPPROTO_ROUTING: {
5853	struct ipv6_opt_hdr *hp;
5854
5855	err = skb_maybe_pull_tail(skb,
5856	len: off +
5857	sizeof(struct ipv6_opt_hdr),
5858	MAX_IPV6_HDR_LEN);
5859	if (err < `0`)
5860	goto out;
5861
5862	hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
5863	nexthdr = hp->nexthdr;
5864	off += ipv6_optlen(hp);
5865	break;
5866	}
5867	case IPPROTO_AH: {
5868	struct ip_auth_hdr *hp;
5869
5870	err = skb_maybe_pull_tail(skb,
5871	len: off +
5872	sizeof(struct ip_auth_hdr),
5873	MAX_IPV6_HDR_LEN);
5874	if (err < `0`)
5875	goto out;
5876
5877	hp = OPT_HDR(struct ip_auth_hdr, skb, off);
5878	nexthdr = hp->nexthdr;
5879	off += ipv6_authlen(hp);
5880	break;
5881	}
5882	case IPPROTO_FRAGMENT: {
5883	struct frag_hdr *hp;
5884
5885	err = skb_maybe_pull_tail(skb,
5886	len: off +
5887	sizeof(struct frag_hdr),
5888	MAX_IPV6_HDR_LEN);
5889	if (err < `0`)
5890	goto out;
5891
5892	hp = OPT_HDR(struct frag_hdr, skb, off);
5893
5894	if (hp->frag_off & htons(IP6_OFFSET \| IP6_MF))
5895	fragment = true;
5896
5897	nexthdr = hp->nexthdr;
5898	off += sizeof(struct frag_hdr);
5899	break;
5900	}
5901	default:
5902	done = true;
5903	break;
5904	}
5905	}
5906
5907	err = -EPROTO;
5908
5909	if (!done \|\| fragment)
5910	goto out;
5911
5912	csum = skb_checksum_setup_ip(skb, proto: nexthdr, off);
5913	if (IS_ERR(ptr: csum))
5914	return PTR_ERR(ptr: csum);
5915
5916	if (recalculate)
5917	*csum = ~csum_ipv6_magic(saddr: &ipv6_hdr(skb)->saddr,
5918	daddr: &ipv6_hdr(skb)->daddr,
5919	len: skb->len - off, proto: nexthdr, csum: `0`);
5920	err = `0`;
5921
5922	out:
5923	return err;
5924	}
5925
5926	/**
5927	* skb_checksum_setup - set up partial checksum offset
5928	* @skb: the skb to set up
5929	* @recalculate: if true the pseudo-header checksum will be recalculated
5930	*/
5931	int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
5932	{
5933	int err;
5934
5935	switch (skb->protocol) {
5936	case htons(ETH_P_IP):
5937	err = skb_checksum_setup_ipv4(skb, recalculate);
5938	break;
5939
5940	case htons(ETH_P_IPV6):
5941	err = skb_checksum_setup_ipv6(skb, recalculate);
5942	break;
5943
5944	default:
5945	err = -EPROTO;
5946	break;
5947	}
5948
5949	return err;
5950	}
5951	EXPORT_SYMBOL(skb_checksum_setup);
5952
5953	/**
5954	* skb_checksum_maybe_trim - maybe trims the given skb
5955	* @skb: the skb to check
5956	* @transport_len: the data length beyond the network header
5957	*
5958	* Checks whether the given skb has data beyond the given transport length.
5959	* If so, returns a cloned skb trimmed to this transport length.
5960	* Otherwise returns the provided skb. Returns NULL in error cases
5961	* (e.g. transport_len exceeds skb length or out-of-memory).
5962	*
5963	* Caller needs to set the skb transport header and free any returned skb if it
5964	* differs from the provided skb.
5965	*/
5966	static struct sk_buff skb_checksum_maybe_trim(struct* sk_buff *skb,
5967	unsigned int transport_len)
5968	{
5969	struct sk_buff *skb_chk;
5970	unsigned int len = skb_transport_offset(skb) + transport_len;
5971	int ret;
5972
5973	if (skb->len < len)
5974	return NULL;
5975	else if (skb->len == len)
5976	return skb;
5977
5978	skb_chk = skb_clone(skb, GFP_ATOMIC);
5979	if (!skb_chk)
5980	return NULL;
5981
5982	ret = pskb_trim_rcsum(skb: skb_chk, len);
5983	if (ret) {
5984	kfree_skb(skb: skb_chk);
5985	return NULL;
5986	}
5987
5988	return skb_chk;
5989	}
5990
5991	/**
5992	* skb_checksum_trimmed - validate checksum of an skb
5993	* @skb: the skb to check
5994	* @transport_len: the data length beyond the network header
5995	* @skb_chkf: checksum function to use
5996	*
5997	* Applies the given checksum function skb_chkf to the provided skb.
5998	* Returns a checked and maybe trimmed skb. Returns NULL on error.
5999	*
6000	* If the skb has data beyond the given transport length, then a
6001	* trimmed & cloned skb is checked and returned.
6002	*
6003	* Caller needs to set the skb transport header and free any returned skb if it
6004	* differs from the provided skb.
6005	*/
6006	struct sk_buff skb_checksum_trimmed(struct* sk_buff *skb,
6007	unsigned int transport_len,
6008	__sum16(skb_chkf)(struct* sk_buff *skb))
6009	{
6010	struct sk_buff *skb_chk;
6011	unsigned int offset = skb_transport_offset(skb);
6012	__sum16 ret;
6013
6014	skb_chk = skb_checksum_maybe_trim(skb, transport_len);
6015	if (!skb_chk)
6016	goto err;
6017
6018	if (!pskb_may_pull(skb: skb_chk, len: offset))
6019	goto err;
6020
6021	skb_pull_rcsum(skb_chk, offset);
6022	ret = skb_chkf(skb_chk);
6023	skb_push_rcsum(skb: skb_chk, len: offset);
6024
6025	if (ret)
6026	goto err;
6027
6028	return skb_chk;
6029
6030	err:
6031	if (skb_chk && skb_chk != skb)
6032	kfree_skb(skb: skb_chk);
6033
6034	return NULL;
6035
6036	}
6037	EXPORT_SYMBOL(skb_checksum_trimmed);
6038
6039	void __skb_warn_lro_forwarding(const struct sk_buff *skb)
6040	{
6041	net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
6042	skb->dev->name);
6043	}
6044	EXPORT_SYMBOL(__skb_warn_lro_forwarding);
6045
6046	void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
6047	{
6048	if (head_stolen) {
6049	skb_release_head_state(skb);
6050	kmem_cache_free(s: net_hotdata.skbuff_cache, objp: skb);
6051	} else {
6052	__kfree_skb(skb);
6053	}
6054	}
6055	EXPORT_SYMBOL(kfree_skb_partial);
6056
6057	/**
6058	* skb_try_coalesce - try to merge skb to prior one
6059	* @to: prior buffer
6060	* @from: buffer to add
6061	* @fragstolen: pointer to boolean
6062	* @delta_truesize: how much more was allocated than was requested
6063	*/
6064	bool skb_try_coalesce(struct sk_buff to, struct* sk_buff *from,
6065	bool fragstolen, int* *delta_truesize)
6066	{
6067	struct skb_shared_info to_shinfo, from_shinfo;
6068	int i, delta, len = from->len;
6069
6070	*fragstolen = false;
6071
6072	if (skb_cloned(skb: to))
6073	return false;
6074
6075	/ In general, avoid mixing page_pool and non-page_pool allocated*
6076	* pages within the same SKB. In theory we could take full
6077	* references if @from is cloned and !@to->pp_recycle but its
6078	* tricky (due to potential race with the clone disappearing) and
6079	* rare, so not worth dealing with.
6080	*/
6081	if (to->pp_recycle != from->pp_recycle)
6082	return false;
6083
6084	if (skb_frags_readable(skb: from) != skb_frags_readable(skb: to))
6085	return false;
6086
6087	if (len <= skb_tailroom(skb: to) && skb_frags_readable(skb: from)) {
6088	if (len)
6089	BUG_ON(skb_copy_bits(from, `0`, skb_put(to, len), len));
6090	*delta_truesize = `0`;
6091	return true;
6092	}
6093
6094	to_shinfo = skb_shinfo(to);
6095	from_shinfo = skb_shinfo(from);
6096	if (to_shinfo->frag_list \|\| from_shinfo->frag_list)
6097	return false;
6098	if (skb_zcopy(skb: to) \|\| skb_zcopy(skb: from))
6099	return false;
6100
6101	if (skb_headlen(skb: from) != `0`) {
6102	struct page *page;
6103	unsigned int offset;
6104
6105	if (to_shinfo->nr_frags +
6106	from_shinfo->nr_frags >= MAX_SKB_FRAGS)
6107	return false;
6108
6109	if (skb_head_is_locked(skb: from))
6110	return false;
6111
6112	delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
6113
6114	page = virt_to_head_page(x: from->head);
6115	offset = from->data - (unsigned char *)page_address(page);
6116
6117	skb_fill_page_desc(skb: to, i: to_shinfo->nr_frags,
6118	page, off: offset, size: skb_headlen(skb: from));
6119	*fragstolen = true;
6120	} else {
6121	if (to_shinfo->nr_frags +
6122	from_shinfo->nr_frags > MAX_SKB_FRAGS)
6123	return false;
6124
6125	delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
6126	}
6127
6128	WARN_ON_ONCE(delta < len);
6129
6130	memcpy(to_shinfo->frags + to_shinfo->nr_frags,
6131	from_shinfo->frags,
6132	from_shinfo->nr_frags * sizeof(skb_frag_t));
6133	to_shinfo->nr_frags += from_shinfo->nr_frags;
6134
6135	if (!skb_cloned(skb: from))
6136	from_shinfo->nr_frags = `0`;
6137
6138	/ if the skb is not cloned this does nothing*
6139	* since we set nr_frags to 0.
6140	*/
6141	if (skb_pp_frag_ref(skb: from)) {
6142	for (i = `0`; i < from_shinfo->nr_frags; i++)
6143	__skb_frag_ref(frag: &from_shinfo->frags[i]);
6144	}
6145
6146	to->truesize += delta;
6147	to->len += len;
6148	to->data_len += len;
6149
6150	*delta_truesize = delta;
6151	return true;
6152	}
6153	EXPORT_SYMBOL(skb_try_coalesce);
6154
6155	/**
6156	* skb_scrub_packet - scrub an skb
6157	*
6158	* @skb: buffer to clean
6159	* @xnet: packet is crossing netns
6160	*
6161	* skb_scrub_packet can be used after encapsulating or decapsulating a packet
6162	* into/from a tunnel. Some information have to be cleared during these
6163	* operations.
6164	* skb_scrub_packet can also be used to clean a skb before injecting it in
6165	* another namespace (@xnet == true). We have to clear all information in the
6166	* skb that could impact namespace isolation.
6167	*/
6168	void skb_scrub_packet(struct sk_buff *skb, bool xnet)
6169	{
6170	skb->pkt_type = PACKET_HOST;
6171	skb->skb_iif = `0`;
6172	skb->ignore_df = `0`;
6173	skb_dst_drop(skb);
6174	skb_ext_reset(skb);
6175	nf_reset_ct(skb);
6176	nf_reset_trace(skb);
6177
6178	#ifdef CONFIG_NET_SWITCHDEV
6179	skb->offload_fwd_mark = `0`;
6180	skb->offload_l3_fwd_mark = `0`;
6181	#endif
6182	ipvs_reset(skb);
6183
6184	if (!xnet)
6185	return;
6186
6187	skb->mark = `0`;
6188	skb_clear_tstamp(skb);
6189	}
6190	EXPORT_SYMBOL_GPL(skb_scrub_packet);
6191
6192	static struct sk_buff skb_reorder_vlan_header(struct* sk_buff *skb)
6193	{
6194	int mac_len, meta_len;
6195	void *meta;
6196
6197	if (skb_cow(skb, headroom: skb_headroom(skb)) < `0`) {
6198	kfree_skb(skb);
6199	return NULL;
6200	}
6201
6202	mac_len = skb->data - skb_mac_header(skb);
6203	if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
6204	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
6205	mac_len - VLAN_HLEN - ETH_TLEN);
6206	}
6207
6208	meta_len = skb_metadata_len(skb);
6209	if (meta_len) {
6210	meta = skb_metadata_end(skb) - meta_len;
6211	memmove(meta + VLAN_HLEN, meta, meta_len);
6212	}
6213
6214	skb->mac_header += VLAN_HLEN;
6215	return skb;
6216	}
6217
6218	struct sk_buff skb_vlan_untag(struct* sk_buff *skb)
6219	{
6220	struct vlan_hdr *vhdr;
6221	u16 vlan_tci;
6222
6223	if (unlikely(skb_vlan_tag_present(skb))) {
6224	/ vlan_tci is already set-up so leave this for another time /
6225	return skb;
6226	}
6227
6228	skb = skb_share_check(skb, GFP_ATOMIC);
6229	if (unlikely(!skb))
6230	goto err_free;
6231	/ We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). /
6232	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
6233	goto err_free;
6234
6235	vhdr = (struct vlan_hdr *)skb->data;
6236	vlan_tci = ntohs(vhdr->h_vlan_TCI);
6237	__vlan_hwaccel_put_tag(skb, vlan_proto: skb->protocol, vlan_tci);
6238
6239	skb_pull_rcsum(skb, VLAN_HLEN);
6240	vlan_set_encap_proto(skb, vhdr);
6241
6242	skb = skb_reorder_vlan_header(skb);
6243	if (unlikely(!skb))
6244	goto err_free;
6245
6246	skb_reset_network_header(skb);
6247	if (!skb_transport_header_was_set(skb))
6248	skb_reset_transport_header(skb);
6249	skb_reset_mac_len(skb);
6250
6251	return skb;
6252
6253	err_free:
6254	kfree_skb(skb);
6255	return NULL;
6256	}
6257	EXPORT_SYMBOL(skb_vlan_untag);
6258
6259	int skb_ensure_writable(struct sk_buff skb, unsigned* int write_len)
6260	{
6261	if (!pskb_may_pull(skb, len: write_len))
6262	return -ENOMEM;
6263
6264	if (!skb_frags_readable(skb))
6265	return -EFAULT;
6266
6267	if (!skb_cloned(skb) \|\| skb_clone_writable(skb, len: write_len))
6268	return `0`;
6269
6270	return pskb_expand_head(skb, `0`, `0`, GFP_ATOMIC);
6271	}
6272	EXPORT_SYMBOL(skb_ensure_writable);
6273
6274	int skb_ensure_writable_head_tail(struct sk_buff skb, struct* net_device *dev)
6275	{
6276	int needed_headroom = dev->needed_headroom;
6277	int needed_tailroom = dev->needed_tailroom;
6278
6279	/ For tail taggers, we need to pad short frames ourselves, to ensure*
6280	* that the tail tag does not fail at its role of being at the end of
6281	* the packet, once the conduit interface pads the frame. Account for
6282	* that pad length here, and pad later.
6283	*/
6284	if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
6285	needed_tailroom += ETH_ZLEN - skb->len;
6286	/ skb_headroom() returns unsigned int... /
6287	needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), `0`);
6288	needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), `0`);
6289
6290	if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
6291	/ No reallocation needed, yay! /
6292	return `0`;
6293
6294	return pskb_expand_head(skb, needed_headroom, needed_tailroom,
6295	GFP_ATOMIC);
6296	}
6297	EXPORT_SYMBOL(skb_ensure_writable_head_tail);
6298
6299	/ remove VLAN header from packet and update csum accordingly.*
6300	* expects a non skb_vlan_tag_present skb with a vlan tag payload
6301	*/
6302	int __skb_vlan_pop(struct sk_buff skb, u16 vlan_tci)
6303	{
6304	int offset = skb->data - skb_mac_header(skb);
6305	int err;
6306
6307	if (WARN_ONCE(offset,
6308	"__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
6309	offset)) {
6310	return -EINVAL;
6311	}
6312
6313	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
6314	if (unlikely(err))
6315	return err;
6316
6317	skb_postpull_rcsum(skb, start: skb->data + (`2` * ETH_ALEN), VLAN_HLEN);
6318
6319	vlan_remove_tag(skb, vlan_tci);
6320
6321	skb->mac_header += VLAN_HLEN;
6322
6323	if (skb_network_offset(skb) < ETH_HLEN)
6324	skb_set_network_header(skb, ETH_HLEN);
6325
6326	skb_reset_mac_len(skb);
6327
6328	return err;
6329	}
6330	EXPORT_SYMBOL(__skb_vlan_pop);
6331
6332	/ Pop a vlan tag either from hwaccel or from payload.*
6333	* Expects skb->data at mac header.
6334	*/
6335	int skb_vlan_pop(struct sk_buff *skb)
6336	{
6337	u16 vlan_tci;
6338	__be16 vlan_proto;
6339	int err;
6340
6341	if (likely(skb_vlan_tag_present(skb))) {
6342	__vlan_hwaccel_clear_tag(skb);
6343	} else {
6344	if (unlikely(!eth_type_vlan(skb->protocol)))
6345	return `0`;
6346
6347	err = __skb_vlan_pop(skb, &vlan_tci);
6348	if (err)
6349	return err;
6350	}
6351	/ move next vlan tag to hw accel tag /
6352	if (likely(!eth_type_vlan(skb->protocol)))
6353	return `0`;
6354
6355	vlan_proto = skb->protocol;
6356	err = __skb_vlan_pop(skb, &vlan_tci);
6357	if (unlikely(err))
6358	return err;
6359
6360	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
6361	return `0`;
6362	}
6363	EXPORT_SYMBOL(skb_vlan_pop);
6364
6365	/ Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).*
6366	* Expects skb->data at mac header.
6367	*/
6368	int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
6369	{
6370	if (skb_vlan_tag_present(skb)) {
6371	int offset = skb->data - skb_mac_header(skb);
6372	int err;
6373
6374	if (WARN_ONCE(offset,
6375	"skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
6376	offset)) {
6377	return -EINVAL;
6378	}
6379
6380	err = __vlan_insert_tag(skb, vlan_proto: skb->vlan_proto,
6381	skb_vlan_tag_get(skb));
6382	if (err)
6383	return err;
6384
6385	skb->protocol = skb->vlan_proto;
6386	skb->network_header -= VLAN_HLEN;
6387
6388	skb_postpush_rcsum(skb, start: skb->data + (`2` * ETH_ALEN), VLAN_HLEN);
6389	}
6390	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
6391	return `0`;
6392	}
6393	EXPORT_SYMBOL(skb_vlan_push);
6394
6395	/**
6396	* skb_eth_pop() - Drop the Ethernet header at the head of a packet
6397	*
6398	* @skb: Socket buffer to modify
6399	*
6400	* Drop the Ethernet header of @skb.
6401	*
6402	* Expects that skb->data points to the mac header and that no VLAN tags are
6403	* present.
6404	*
6405	* Returns 0 on success, -errno otherwise.
6406	*/
6407	int skb_eth_pop(struct sk_buff *skb)
6408	{
6409	if (!pskb_may_pull(skb, ETH_HLEN) \|\| skb_vlan_tagged(skb) \|\|
6410	skb_network_offset(skb) < ETH_HLEN)
6411	return -EPROTO;
6412
6413	skb_pull_rcsum(skb, ETH_HLEN);
6414	skb_reset_mac_header(skb);
6415	skb_reset_mac_len(skb);
6416
6417	return `0`;
6418	}
6419	EXPORT_SYMBOL(skb_eth_pop);
6420
6421	/**
6422	* skb_eth_push() - Add a new Ethernet header at the head of a packet
6423	*
6424	* @skb: Socket buffer to modify
6425	* @dst: Destination MAC address of the new header
6426	* @src: Source MAC address of the new header
6427	*
6428	* Prepend @skb with a new Ethernet header.
6429	*
6430	* Expects that skb->data points to the mac header, which must be empty.
6431	*
6432	* Returns 0 on success, -errno otherwise.
6433	*/
6434	int skb_eth_push(struct sk_buff skb, const* unsigned char *dst,
6435	const unsigned char *src)
6436	{
6437	struct ethhdr *eth;
6438	int err;
6439
6440	if (skb_network_offset(skb) \|\| skb_vlan_tag_present(skb))
6441	return -EPROTO;
6442
6443	err = skb_cow_head(skb, headroom: sizeof(*eth));
6444	if (err < `0`)
6445	return err;
6446
6447	skb_push(skb, sizeof(*eth));
6448	skb_reset_mac_header(skb);
6449	skb_reset_mac_len(skb);
6450
6451	eth = eth_hdr(skb);
6452	ether_addr_copy(dst: eth->h_dest, src: dst);
6453	ether_addr_copy(dst: eth->h_source, src);
6454	eth->h_proto = skb->protocol;
6455
6456	skb_postpush_rcsum(skb, start: eth, len: sizeof(*eth));
6457
6458	return `0`;
6459	}
6460	EXPORT_SYMBOL(skb_eth_push);
6461
6462	/ Update the ethertype of hdr and the skb csum value if required. /
6463	static void skb_mod_eth_type(struct sk_buff skb, struct* ethhdr *hdr,
6464	__be16 ethertype)
6465	{
6466	if (skb->ip_summed == CHECKSUM_COMPLETE) {
6467	__be16 diff[] = { ~hdr->h_proto, ethertype };
6468
6469	skb->csum = csum_partial(buff: (char )diff, len: sizeof*(diff), sum: skb->csum);
6470	}
6471
6472	hdr->h_proto = ethertype;
6473	}
6474
6475	/**
6476	* skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
6477	* the packet
6478	*
6479	* @skb: buffer
6480	* @mpls_lse: MPLS label stack entry to push
6481	* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
6482	* @mac_len: length of the MAC header
6483	* @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
6484	* ethernet
6485	*
6486	* Expects skb->data at mac header.
6487	*
6488	* Returns 0 on success, -errno otherwise.
6489	*/
6490	int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
6491	int mac_len, bool ethernet)
6492	{
6493	struct mpls_shim_hdr *lse;
6494	int err;
6495
6496	if (unlikely(!eth_p_mpls(mpls_proto)))
6497	return -EINVAL;
6498
6499	/ Networking stack does not allow simultaneous Tunnel and MPLS GSO. /
6500	if (skb->encapsulation)
6501	return -EINVAL;
6502
6503	err = skb_cow_head(skb, MPLS_HLEN);
6504	if (unlikely(err))
6505	return err;
6506
6507	if (!skb->inner_protocol) {
6508	skb_set_inner_network_header(skb, offset: skb_network_offset(skb));
6509	skb_set_inner_protocol(skb, protocol: skb->protocol);
6510	}
6511
6512	skb_push(skb, MPLS_HLEN);
6513	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
6514	mac_len);
6515	skb_reset_mac_header(skb);
6516	skb_set_network_header(skb, offset: mac_len);
6517	skb_reset_mac_len(skb);
6518
6519	lse = mpls_hdr(skb);
6520	lse->label_stack_entry = mpls_lse;
6521	skb_postpush_rcsum(skb, start: lse, MPLS_HLEN);
6522
6523	if (ethernet && mac_len >= ETH_HLEN)
6524	skb_mod_eth_type(skb, hdr: eth_hdr(skb), ethertype: mpls_proto);
6525	skb->protocol = mpls_proto;
6526
6527	return `0`;
6528	}
6529	EXPORT_SYMBOL_GPL(skb_mpls_push);
6530
6531	/**
6532	* skb_mpls_pop() - pop the outermost MPLS header
6533	*
6534	* @skb: buffer
6535	* @next_proto: ethertype of header after popped MPLS header
6536	* @mac_len: length of the MAC header
6537	* @ethernet: flag to indicate if the packet is ethernet
6538	*
6539	* Expects skb->data at mac header.
6540	*
6541	* Returns 0 on success, -errno otherwise.
6542	*/
6543	int skb_mpls_pop(struct sk_buff skb, __be16 next_proto, int* mac_len,
6544	bool ethernet)
6545	{
6546	int err;
6547
6548	if (unlikely(!eth_p_mpls(skb->protocol)))
6549	return `0`;
6550
6551	err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
6552	if (unlikely(err))
6553	return err;
6554
6555	skb_postpull_rcsum(skb, start: mpls_hdr(skb), MPLS_HLEN);
6556	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
6557	mac_len);
6558
6559	__skb_pull(skb, MPLS_HLEN);
6560	skb_reset_mac_header(skb);
6561	skb_set_network_header(skb, offset: mac_len);
6562
6563	if (ethernet && mac_len >= ETH_HLEN) {
6564	struct ethhdr *hdr;
6565
6566	/ use mpls_hdr() to get ethertype to account for VLANs. /
6567	hdr = (struct ethhdr )((void* *)mpls_hdr(skb) - ETH_HLEN);
6568	skb_mod_eth_type(skb, hdr, ethertype: next_proto);
6569	}
6570	skb->protocol = next_proto;
6571
6572	return `0`;
6573	}
6574	EXPORT_SYMBOL_GPL(skb_mpls_pop);
6575
6576	/**
6577	* skb_mpls_update_lse() - modify outermost MPLS header and update csum
6578	*
6579	* @skb: buffer
6580	* @mpls_lse: new MPLS label stack entry to update to
6581	*
6582	* Expects skb->data at mac header.
6583	*
6584	* Returns 0 on success, -errno otherwise.
6585	*/
6586	int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
6587	{
6588	int err;
6589
6590	if (unlikely(!eth_p_mpls(skb->protocol)))
6591	return -EINVAL;
6592
6593	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
6594	if (unlikely(err))
6595	return err;
6596
6597	if (skb->ip_summed == CHECKSUM_COMPLETE) {
6598	__be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
6599
6600	skb->csum = csum_partial(buff: (char )diff, len: sizeof*(diff), sum: skb->csum);
6601	}
6602
6603	mpls_hdr(skb)->label_stack_entry = mpls_lse;
6604
6605	return `0`;
6606	}
6607	EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
6608
6609	/**
6610	* skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
6611	*
6612	* @skb: buffer
6613	*
6614	* Expects skb->data at mac header.
6615	*
6616	* Returns 0 on success, -errno otherwise.
6617	*/
6618	int skb_mpls_dec_ttl(struct sk_buff *skb)
6619	{
6620	u32 lse;
6621	u8 ttl;
6622
6623	if (unlikely(!eth_p_mpls(skb->protocol)))
6624	return -EINVAL;
6625
6626	if (!pskb_may_pull(skb, len: skb_network_offset(skb) + MPLS_HLEN))
6627	return -ENOMEM;
6628
6629	lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
6630	ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
6631	if (!--ttl)
6632	return -EINVAL;
6633
6634	lse &= ~MPLS_LS_TTL_MASK;
6635	lse \|= ttl << MPLS_LS_TTL_SHIFT;
6636
6637	return skb_mpls_update_lse(skb, cpu_to_be32(lse));
6638	}
6639	EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
6640
6641	/**
6642	* alloc_skb_with_frags - allocate skb with page frags
6643	*
6644	* @header_len: size of linear part
6645	* @data_len: needed length in frags
6646	* @order: max page order desired.
6647	* @errcode: pointer to error code if any
6648	* @gfp_mask: allocation mask
6649	*
6650	* This can be used to allocate a paged skb, given a maximal order for frags.
6651	*/
6652	struct sk_buff alloc_skb_with_frags(unsigned* long header_len,
6653	unsigned long data_len,
6654	int order,
6655	int *errcode,
6656	gfp_t gfp_mask)
6657	{
6658	unsigned long chunk;
6659	struct sk_buff *skb;
6660	struct page *page;
6661	int nr_frags = `0`;
6662
6663	*errcode = -EMSGSIZE;
6664	if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order)))
6665	return NULL;
6666
6667	*errcode = -ENOBUFS;
6668	skb = alloc_skb(size: header_len, priority: gfp_mask);
6669	if (!skb)
6670	return NULL;
6671
6672	while (data_len) {
6673	if (nr_frags == MAX_SKB_FRAGS - `1`)
6674	goto failure;
6675	while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order))
6676	order--;
6677
6678	if (order) {
6679	page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) \|
6680	__GFP_COMP \|
6681	__GFP_NOWARN,
6682	order);
6683	if (!page) {
6684	order--;
6685	continue;
6686	}
6687	} else {
6688	page = alloc_page(gfp_mask);
6689	if (!page)
6690	goto failure;
6691	}
6692	chunk = min_t(unsigned long, data_len,
6693	PAGE_SIZE << order);
6694	skb_fill_page_desc(skb, i: nr_frags, page, off: `0`, size: chunk);
6695	nr_frags++;
6696	skb->truesize += (PAGE_SIZE << order);
6697	data_len -= chunk;
6698	}
6699	return skb;
6700
6701	failure:
6702	kfree_skb(skb);
6703	return NULL;
6704	}
6705	EXPORT_SYMBOL(alloc_skb_with_frags);
6706
6707	/ carve out the first off bytes from skb when off < headlen /
6708	static int pskb_carve_inside_header(struct sk_buff skb, const* u32 off,
6709	const int headlen, gfp_t gfp_mask)
6710	{
6711	int i;
6712	unsigned int size = skb_end_offset(skb);
6713	int new_hlen = headlen - off;
6714	u8 *data;
6715
6716	if (skb_pfmemalloc(skb))
6717	gfp_mask \|= __GFP_MEMALLOC;
6718
6719	data = kmalloc_reserve(size: &size, flags: gfp_mask, NUMA_NO_NODE, NULL);
6720	if (!data)
6721	return -ENOMEM;
6722	size = SKB_WITH_OVERHEAD(size);
6723
6724	/ Copy real data, and all frags /
6725	skb_copy_from_linear_data_offset(skb, offset: off, to: data, len: new_hlen);
6726	skb->len -= off;
6727
6728	memcpy((struct skb_shared_info *)(data + size),
6729	skb_shinfo(skb),
6730	offsetof(struct skb_shared_info,
6731	frags[skb_shinfo(skb)->nr_frags]));
6732	if (skb_cloned(skb)) {
6733	/ drop the old head gracefully /
6734	if (skb_orphan_frags(skb, gfp_mask)) {
6735	skb_kfree_head(head: data, end_offset: size);
6736	return -ENOMEM;
6737	}
6738	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
6739	skb_frag_ref(skb, f: i);
6740	if (skb_has_frag_list(skb))
6741	skb_clone_fraglist(skb);
6742	skb_release_data(skb, reason: SKB_CONSUMED);
6743	} else {
6744	/ we can reuse existing recount- all we did was*
6745	* relocate values
6746	*/
6747	skb_free_head(skb);
6748	}
6749
6750	skb->head = data;
6751	skb->data = data;
6752	skb->head_frag = `0`;
6753	skb_set_end_offset(skb, offset: size);
6754	skb_set_tail_pointer(skb, offset: skb_headlen(skb));
6755	skb_headers_offset_update(skb, `0`);
6756	skb->cloned = `0`;
6757	skb->hdr_len = `0`;
6758	skb->nohdr = `0`;
6759	atomic_set(v: &skb_shinfo(skb)->dataref, i: `1`);
6760
6761	return `0`;
6762	}
6763
6764	static int pskb_carve(struct sk_buff skb, const* u32 off, gfp_t gfp);
6765
6766	/ carve out the first eat bytes from skb's frag_list. May recurse into*
6767	* pskb_carve()
6768	*/
6769	static int pskb_carve_frag_list(struct sk_buff *skb,
6770	struct skb_shared_info shinfo, int* eat,
6771	gfp_t gfp_mask)
6772	{
6773	struct sk_buff *list = shinfo->frag_list;
6774	struct sk_buff *clone = NULL;
6775	struct sk_buff *insp = NULL;
6776
6777	do {
6778	if (!list) {
6779	pr_err("Not enough bytes to eat. Want %d\n", eat);
6780	return -EFAULT;
6781	}
6782	if (list->len <= eat) {
6783	/ Eaten as whole. /
6784	eat -= list->len;
6785	list = list->next;
6786	insp = list;
6787	} else {
6788	/ Eaten partially. /
6789	if (skb_shared(skb: list)) {
6790	clone = skb_clone(list, gfp_mask);
6791	if (!clone)
6792	return -ENOMEM;
6793	insp = list->next;
6794	list = clone;
6795	} else {
6796	/ This may be pulled without problems. /
6797	insp = list;
6798	}
6799	if (pskb_carve(skb: list, off: eat, gfp: gfp_mask) < `0`) {
6800	kfree_skb(skb: clone);
6801	return -ENOMEM;
6802	}
6803	break;
6804	}
6805	} while (eat);
6806
6807	/ Free pulled out fragments. /
6808	while ((list = shinfo->frag_list) != insp) {
6809	shinfo->frag_list = list->next;
6810	consume_skb(list);
6811	}
6812	/ And insert new clone at head. /
6813	if (clone) {
6814	clone->next = list;
6815	shinfo->frag_list = clone;
6816	}
6817	return `0`;
6818	}
6819
6820	/ carve off first len bytes from skb. Split line (off) is in the*
6821	* non-linear part of skb
6822	*/
6823	static int pskb_carve_inside_nonlinear(struct sk_buff skb, const* u32 off,
6824	int pos, gfp_t gfp_mask)
6825	{
6826	int i, k = `0`;
6827	unsigned int size = skb_end_offset(skb);
6828	u8 *data;
6829	const int nfrags = skb_shinfo(skb)->nr_frags;
6830	struct skb_shared_info *shinfo;
6831
6832	if (skb_pfmemalloc(skb))
6833	gfp_mask \|= __GFP_MEMALLOC;
6834
6835	data = kmalloc_reserve(size: &size, flags: gfp_mask, NUMA_NO_NODE, NULL);
6836	if (!data)
6837	return -ENOMEM;
6838	size = SKB_WITH_OVERHEAD(size);
6839
6840	memcpy((struct skb_shared_info *)(data + size),
6841	skb_shinfo(skb), offsetof(struct skb_shared_info, frags[`0`]));
6842	if (skb_orphan_frags(skb, gfp_mask)) {
6843	skb_kfree_head(head: data, end_offset: size);
6844	return -ENOMEM;
6845	}
6846	shinfo = (struct skb_shared_info *)(data + size);
6847	for (i = `0`; i < nfrags; i++) {
6848	int fsize = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
6849
6850	if (pos + fsize > off) {
6851	shinfo->frags[k] = skb_shinfo(skb)->frags[i];
6852
6853	if (pos < off) {
6854	/ Split frag.*
6855	* We have two variants in this case:
6856	* 1. Move all the frag to the second
6857	* part, if it is possible. F.e.
6858	* this approach is mandatory for TUX,
6859	* where splitting is expensive.
6860	* 2. Split is accurately. We make this.
6861	*/
6862	skb_frag_off_add(frag: &shinfo->frags[`0`], delta: off - pos);
6863	skb_frag_size_sub(frag: &shinfo->frags[`0`], delta: off - pos);
6864	}
6865	skb_frag_ref(skb, f: i);
6866	k++;
6867	}
6868	pos += fsize;
6869	}
6870	shinfo->nr_frags = k;
6871	if (skb_has_frag_list(skb))
6872	skb_clone_fraglist(skb);
6873
6874	/ split line is in frag list /
6875	if (k == `0` && pskb_carve_frag_list(skb, shinfo, eat: off - pos, gfp_mask)) {
6876	/ skb_frag_unref() is not needed here as shinfo->nr_frags = 0. /
6877	if (skb_has_frag_list(skb))
6878	kfree_skb_list(skb_shinfo(skb)->frag_list);
6879	skb_kfree_head(head: data, end_offset: size);
6880	return -ENOMEM;
6881	}
6882	skb_release_data(skb, reason: SKB_CONSUMED);
6883
6884	skb->head = data;
6885	skb->head_frag = `0`;
6886	skb->data = data;
6887	skb_set_end_offset(skb, offset: size);
6888	skb_reset_tail_pointer(skb);
6889	skb_headers_offset_update(skb, `0`);
6890	skb->cloned = `0`;
6891	skb->hdr_len = `0`;
6892	skb->nohdr = `0`;
6893	skb->len -= off;
6894	skb->data_len = skb->len;
6895	atomic_set(v: &skb_shinfo(skb)->dataref, i: `1`);
6896	return `0`;
6897	}
6898
6899	/ remove len bytes from the beginning of the skb /
6900	static int pskb_carve(struct sk_buff skb, const* u32 len, gfp_t gfp)
6901	{
6902	int headlen = skb_headlen(skb);
6903
6904	if (len < headlen)
6905	return pskb_carve_inside_header(skb, off: len, headlen, gfp_mask: gfp);
6906	else
6907	return pskb_carve_inside_nonlinear(skb, off: len, pos: headlen, gfp_mask: gfp);
6908	}
6909
6910	/ Extract to_copy bytes starting at off from skb, and return this in*
6911	* a new skb
6912	*/
6913	struct sk_buff pskb_extract(struct* sk_buff skb, int* off,
6914	int to_copy, gfp_t gfp)
6915	{
6916	struct sk_buff *clone = skb_clone(skb, gfp);
6917
6918	if (!clone)
6919	return NULL;
6920
6921	if (pskb_carve(skb: clone, len: off, gfp) < `0` \|\|
6922	pskb_trim(skb: clone, len: to_copy)) {
6923	kfree_skb(skb: clone);
6924	return NULL;
6925	}
6926	return clone;
6927	}
6928	EXPORT_SYMBOL(pskb_extract);
6929
6930	/**
6931	* skb_condense - try to get rid of fragments/frag_list if possible
6932	* @skb: buffer
6933	*
6934	* Can be used to save memory before skb is added to a busy queue.
6935	* If packet has bytes in frags and enough tail room in skb->head,
6936	* pull all of them, so that we can free the frags right now and adjust
6937	* truesize.
6938	* Notes:
6939	* We do not reallocate skb->head thus can not fail.
6940	* Caller must re-evaluate skb->truesize if needed.
6941	*/
6942	void skb_condense(struct sk_buff *skb)
6943	{
6944	if (skb->data_len) {
6945	if (skb->data_len > skb->end - skb->tail \|\|
6946	skb_cloned(skb) \|\| !skb_frags_readable(skb))
6947	return;
6948
6949	/ Nice, we can free page frag(s) right now /
6950	__pskb_pull_tail(skb, skb->data_len);
6951	}
6952	/ At this point, skb->truesize might be over estimated,*
6953	* because skb had a fragment, and fragments do not tell
6954	* their truesize.
6955	* When we pulled its content into skb->head, fragment
6956	* was freed, but __pskb_pull_tail() could not possibly
6957	* adjust skb->truesize, not knowing the frag truesize.
6958	*/
6959	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
6960	}
6961	EXPORT_SYMBOL(skb_condense);
6962
6963	#ifdef CONFIG_SKB_EXTENSIONS
6964	static void skb_ext_get_ptr(struct* skb_ext ext, enum* skb_ext_id id)
6965	{
6966	return (void )ext + (ext->offset[id] SKB_EXT_ALIGN_VALUE);
6967	}
6968
6969	/**
6970	* __skb_ext_alloc - allocate a new skb extensions storage
6971	*
6972	* @flags: See kmalloc().
6973	*
6974	* Returns the newly allocated pointer. The pointer can later attached to a
6975	* skb via __skb_ext_set().
6976	* Note: caller must handle the skb_ext as an opaque data.
6977	*/
6978	struct skb_ext *__skb_ext_alloc(gfp_t flags)
6979	{
6980	struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);
6981
6982	if (new) {
6983	memset(new->offset, `0`, sizeof(new->offset));
6984	refcount_set(r: &new->refcnt, n: `1`);
6985	}
6986
6987	return new;
6988	}
6989
6990	static struct skb_ext skb_ext_maybe_cow(struct* skb_ext *old,
6991	unsigned int old_active)
6992	{
6993	struct skb_ext *new;
6994
6995	if (refcount_read(r: &old->refcnt) == `1`)
6996	return old;
6997
6998	new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
6999	if (!new)
7000	return NULL;
7001
7002	memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
7003	refcount_set(r: &new->refcnt, n: `1`);
7004
7005	#ifdef CONFIG_XFRM
7006	if (old_active & (`1` << SKB_EXT_SEC_PATH)) {
7007	struct sec_path *sp = skb_ext_get_ptr(ext: old, id: SKB_EXT_SEC_PATH);
7008	unsigned int i;
7009
7010	for (i = `0`; i < sp->len; i++)
7011	xfrm_state_hold(x: sp->xvec[i]);
7012	}
7013	#endif
7014	#ifdef CONFIG_MCTP_FLOWS
7015	if (old_active & (`1` << SKB_EXT_MCTP)) {
7016	struct mctp_flow *flow = skb_ext_get_ptr(ext: old, id: SKB_EXT_MCTP);
7017
7018	if (flow->key)
7019	refcount_inc(r: &flow->key->refs);
7020	}
7021	#endif
7022	__skb_ext_put(ext: old);
7023	return new;
7024	}
7025
7026	/**
7027	* __skb_ext_set - attach the specified extension storage to this skb
7028	* @skb: buffer
7029	* @id: extension id
7030	* @ext: extension storage previously allocated via __skb_ext_alloc()
7031	*
7032	* Existing extensions, if any, are cleared.
7033	*
7034	* Returns the pointer to the extension.
7035	*/
7036	void __skb_ext_set(struct* sk_buff skb, enum* skb_ext_id id,
7037	struct skb_ext *ext)
7038	{
7039	unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
7040
7041	skb_ext_put(skb);
7042	newlen = newoff + skb_ext_type_len[id];
7043	ext->chunks = newlen;
7044	ext->offset[id] = newoff;
7045	skb->extensions = ext;
7046	skb->active_extensions = `1` << id;
7047	return skb_ext_get_ptr(ext, id);
7048	}
7049
7050	/**
7051	* skb_ext_add - allocate space for given extension, COW if needed
7052	* @skb: buffer
7053	* @id: extension to allocate space for
7054	*
7055	* Allocates enough space for the given extension.
7056	* If the extension is already present, a pointer to that extension
7057	* is returned.
7058	*
7059	* If the skb was cloned, COW applies and the returned memory can be
7060	* modified without changing the extension space of clones buffers.
7061	*
7062	* Returns pointer to the extension or NULL on allocation failure.
7063	*/
7064	void skb_ext_add(struct* sk_buff skb, enum* skb_ext_id id)
7065	{
7066	struct skb_ext new, old = NULL;
7067	unsigned int newlen, newoff;
7068
7069	if (skb->active_extensions) {
7070	old = skb->extensions;
7071
7072	new = skb_ext_maybe_cow(old, old_active: skb->active_extensions);
7073	if (!new)
7074	return NULL;
7075
7076	if (__skb_ext_exist(ext: new, i: id))
7077	goto set_active;
7078
7079	newoff = new->chunks;
7080	} else {
7081	newoff = SKB_EXT_CHUNKSIZEOF(*new);
7082
7083	new = __skb_ext_alloc(GFP_ATOMIC);
7084	if (!new)
7085	return NULL;
7086	}
7087
7088	newlen = newoff + skb_ext_type_len[id];
7089	new->chunks = newlen;
7090	new->offset[id] = newoff;
7091	set_active:
7092	skb->slow_gro = `1`;
7093	skb->extensions = new;
7094	skb->active_extensions \|= `1` << id;
7095	return skb_ext_get_ptr(ext: new, id);
7096	}
7097	EXPORT_SYMBOL(skb_ext_add);
7098
7099	#ifdef CONFIG_XFRM
7100	static void skb_ext_put_sp(struct sec_path *sp)
7101	{
7102	unsigned int i;
7103
7104	for (i = `0`; i < sp->len; i++)
7105	xfrm_state_put(x: sp->xvec[i]);
7106	}
7107	#endif
7108
7109	#ifdef CONFIG_MCTP_FLOWS
7110	static void skb_ext_put_mctp(struct mctp_flow *flow)
7111	{
7112	if (flow->key)
7113	mctp_key_unref(key: flow->key);
7114	}
7115	#endif
7116
7117	void __skb_ext_del(struct sk_buff skb, enum* skb_ext_id id)
7118	{
7119	struct skb_ext *ext = skb->extensions;
7120
7121	skb->active_extensions &= ~(`1` << id);
7122	if (skb->active_extensions == `0`) {
7123	skb->extensions = NULL;
7124	__skb_ext_put(ext);
7125	#ifdef CONFIG_XFRM
7126	} else if (id == SKB_EXT_SEC_PATH &&
7127	refcount_read(r: &ext->refcnt) == `1`) {
7128	struct sec_path *sp = skb_ext_get_ptr(ext, id: SKB_EXT_SEC_PATH);
7129
7130	skb_ext_put_sp(sp);
7131	sp->len = `0`;
7132	#endif
7133	}
7134	}
7135	EXPORT_SYMBOL(__skb_ext_del);
7136
7137	void __skb_ext_put(struct skb_ext *ext)
7138	{
7139	/ If this is last clone, nothing can increment*
7140	* it after check passes. Avoids one atomic op.
7141	*/
7142	if (refcount_read(r: &ext->refcnt) == `1`)
7143	goto free_now;
7144
7145	if (!refcount_dec_and_test(r: &ext->refcnt))
7146	return;
7147	free_now:
7148	#ifdef CONFIG_XFRM
7149	if (__skb_ext_exist(ext, i: SKB_EXT_SEC_PATH))
7150	skb_ext_put_sp(sp: skb_ext_get_ptr(ext, id: SKB_EXT_SEC_PATH));
7151	#endif
7152	#ifdef CONFIG_MCTP_FLOWS
7153	if (__skb_ext_exist(ext, i: SKB_EXT_MCTP))
7154	skb_ext_put_mctp(flow: skb_ext_get_ptr(ext, id: SKB_EXT_MCTP));
7155	#endif
7156
7157	kmem_cache_free(s: skbuff_ext_cache, objp: ext);
7158	}
7159	EXPORT_SYMBOL(__skb_ext_put);
7160	#endif /* CONFIG_SKB_EXTENSIONS */
7161
7162	static void kfree_skb_napi_cache(struct sk_buff *skb)
7163	{
7164	/ if SKB is a clone, don't handle this case /
7165	if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
7166	__kfree_skb(skb);
7167	return;
7168	}
7169
7170	local_bh_disable();
7171	__napi_kfree_skb(skb, reason: SKB_CONSUMED);
7172	local_bh_enable();
7173	}
7174
7175	/**
7176	* skb_attempt_defer_free - queue skb for remote freeing
7177	* @skb: buffer
7178	*
7179	* Put @skb in a per-cpu list, using the cpu which
7180	* allocated the skb/pages to reduce false sharing
7181	* and memory zone spinlock contention.
7182	*/
7183	void skb_attempt_defer_free(struct sk_buff *skb)
7184	{
7185	int cpu = skb->alloc_cpu;
7186	struct softnet_data *sd;
7187	unsigned int defer_max;
7188	bool kick;
7189
7190	if (cpu == raw_smp_processor_id() \|\|
7191	WARN_ON_ONCE(cpu >= nr_cpu_ids) \|\|
7192	!cpu_online(cpu)) {
7193	nodefer: kfree_skb_napi_cache(skb);
7194	return;
7195	}
7196
7197	DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
7198	DEBUG_NET_WARN_ON_ONCE(skb->destructor);
7199
7200	sd = &per_cpu(softnet_data, cpu);
7201	defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
7202	if (READ_ONCE(sd->defer_count) >= defer_max)
7203	goto nodefer;
7204
7205	spin_lock_bh(lock: &sd->defer_lock);
7206	/ Send an IPI every time queue reaches half capacity. /
7207	kick = sd->defer_count == (defer_max >> `1`);
7208	/ Paired with the READ_ONCE() few lines above /
7209	WRITE_ONCE(sd->defer_count, sd->defer_count + `1`);
7210
7211	skb->next = sd->defer_list;
7212	/ Paired with READ_ONCE() in skb_defer_free_flush() /
7213	WRITE_ONCE(sd->defer_list, skb);
7214	spin_unlock_bh(lock: &sd->defer_lock);
7215
7216	/ Make sure to trigger NET_RX_SOFTIRQ on the remote CPU*
7217	* if we are unlucky enough (this seems very unlikely).
7218	*/
7219	if (unlikely(kick))
7220	kick_defer_list_purge(sd, cpu);
7221	}
7222
7223	static void skb_splice_csum_page(struct sk_buff skb, struct* page *page,
7224	size_t offset, size_t len)
7225	{
7226	const char *kaddr;
7227	__wsum csum;
7228
7229	kaddr = kmap_local_page(page);
7230	csum = csum_partial(buff: kaddr + offset, len, sum: `0`);
7231	kunmap_local(kaddr);
7232	skb->csum = csum_block_add(csum: skb->csum, csum2: csum, offset: skb->len);
7233	}
7234
7235	/**
7236	* skb_splice_from_iter - Splice (or copy) pages to skbuff
7237	* @skb: The buffer to add pages to
7238	* @iter: Iterator representing the pages to be added
7239	* @maxsize: Maximum amount of pages to be added
7240	* @gfp: Allocation flags
7241	*
7242	* This is a common helper function for supporting MSG_SPLICE_PAGES. It
7243	* extracts pages from an iterator and adds them to the socket buffer if
7244	* possible, copying them to fragments if not possible (such as if they're slab
7245	* pages).
7246	*
7247	* Returns the amount of data spliced/copied or -EMSGSIZE if there's
7248	* insufficient space in the buffer to transfer anything.
7249	*/
7250	ssize_t skb_splice_from_iter(struct sk_buff skb, struct* iov_iter *iter,
7251	ssize_t maxsize, gfp_t gfp)
7252	{
7253	size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
7254	struct page pages[`8`], *ppages = pages;
7255	ssize_t spliced = `0`, ret = `0`;
7256	unsigned int i;
7257
7258	while (iter->count > `0`) {
7259	ssize_t space, nr, len;
7260	size_t off;
7261
7262	ret = -EMSGSIZE;
7263	space = frag_limit - skb_shinfo(skb)->nr_frags;
7264	if (space < `0`)
7265	break;
7266
7267	/ We might be able to coalesce without increasing nr_frags /
7268	nr = clamp_t(size_t, space, `1`, ARRAY_SIZE(pages));
7269
7270	len = iov_iter_extract_pages(i: iter, pages: &ppages, maxsize, maxpages: nr, extraction_flags: `0`, offset0: &off);
7271	if (len <= `0`) {
7272	ret = len ?: -EIO;
7273	break;
7274	}
7275
7276	i = `0`;
7277	do {
7278	struct page *page = pages[i++];
7279	size_t part = min_t(size_t, PAGE_SIZE - off, len);
7280
7281	ret = -EIO;
7282	if (WARN_ON_ONCE(!sendpage_ok(page)))
7283	goto out;
7284
7285	ret = skb_append_pagefrags(skb, page, off, part,
7286	frag_limit);
7287	if (ret < `0`) {
7288	iov_iter_revert(i: iter, bytes: len);
7289	goto out;
7290	}
7291
7292	if (skb->ip_summed == CHECKSUM_NONE)
7293	skb_splice_csum_page(skb, page, offset: off, len: part);
7294
7295	off = `0`;
7296	spliced += part;
7297	maxsize -= part;
7298	len -= part;
7299	} while (len > `0`);
7300
7301	if (maxsize <= `0`)
7302	break;
7303	}
7304
7305	out:
7306	skb_len_add(skb, delta: spliced);
7307	return spliced ?: ret;
7308	}
7309	EXPORT_SYMBOL(skb_splice_from_iter);
7310
7311	static __always_inline
7312	size_t memcpy_from_iter_csum(void *iter_from, size_t progress,
7313	size_t len, void to, void* *priv2)
7314	{
7315	__wsum *csum = priv2;
7316	__wsum next = csum_partial_copy_nocheck(src: iter_from, dst: to + progress, len);
7317
7318	csum = csum_block_add(csum: csum, csum2: next, offset: progress);
7319	return `0`;
7320	}
7321
7322	static __always_inline
7323	size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
7324	size_t len, void to, void* *priv2)
7325	{
7326	__wsum next, *csum = priv2;
7327
7328	next = csum_and_copy_from_user(src: iter_from, dst: to + progress, len);
7329	csum = csum_block_add(csum: csum, csum2: next, offset: progress);
7330	return next ? `0` : len;
7331	}
7332
7333	bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
7334	__wsum csum, struct* iov_iter *i)
7335	{
7336	size_t copied;
7337
7338	if (WARN_ON_ONCE(!i->data_source))
7339	return false;
7340	copied = iterate_and_advance2(iter: i, len: bytes, priv: addr, priv2: csum,
7341	ustep: copy_from_user_iter_csum,
7342	step: memcpy_from_iter_csum);
7343	if (likely(copied == bytes))
7344	return true;
7345	iov_iter_revert(i, bytes: copied);
7346	return false;
7347	}
7348	EXPORT_SYMBOL(csum_and_copy_from_iter_full);
7349
7350	void get_netmem(netmem_ref netmem)
7351	{
7352	struct net_iov *niov;
7353
7354	if (netmem_is_net_iov(netmem)) {
7355	niov = netmem_to_net_iov(netmem);
7356	if (net_is_devmem_iov(niov))
7357	net_devmem_get_net_iov(niov: netmem_to_net_iov(netmem));
7358	return;
7359	}
7360	get_page(page: netmem_to_page(netmem));
7361	}
7362	EXPORT_SYMBOL(get_netmem);
7363
7364	void put_netmem(netmem_ref netmem)
7365	{
7366	struct net_iov *niov;
7367
7368	if (netmem_is_net_iov(netmem)) {
7369	niov = netmem_to_net_iov(netmem);
7370	if (net_is_devmem_iov(niov))
7371	net_devmem_put_net_iov(niov: netmem_to_net_iov(netmem));
7372	return;
7373	}
7374
7375	put_page(page: netmem_to_page(netmem));
7376	}
7377	EXPORT_SYMBOL(put_netmem);
7378

source code of linux/net/core/skbuff.c