ip6_output.c source code [linux/net/ipv6/ip6_output.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* IPv6 output functions
4	* Linux INET6 implementation
5	*
6	* Authors:
7	* Pedro Roque <roque@di.fc.ul.pt>
8	*
9	* Based on linux/net/ipv4/ip_output.c
10	*
11	* Changes:
12	* A.N.Kuznetsov : airthmetics in fragmentation.
13	* extension headers are implemented.
14	* route changes now work.
15	* ip6_forward does not confuse sniffers.
16	* etc.
17	*
18	* H. von Brand : Added missing #include <linux/string.h>
19	* Imran Patel : frag id should be in NBO
20	* Kazunori MIYAZAWA @USAGI
21	* : add ip6_append_data and related functions
22	* for datagram xmit
23	*/
24
25	#include <linux/errno.h>
26	#include <linux/kernel.h>
27	#include <linux/string.h>
28	#include <linux/socket.h>
29	#include <linux/net.h>
30	#include <linux/netdevice.h>
31	#include <linux/if_arp.h>
32	#include <linux/in6.h>
33	#include <linux/tcp.h>
34	#include <linux/route.h>
35	#include <linux/module.h>
36	#include <linux/slab.h>
37
38	#include <linux/bpf-cgroup.h>
39	#include <linux/netfilter.h>
40	#include <linux/netfilter_ipv6.h>
41
42	#include <net/sock.h>
43	#include <net/snmp.h>
44
45	#include <net/gso.h>
46	#include <net/ipv6.h>
47	#include <net/ndisc.h>
48	#include <net/protocol.h>
49	#include <net/ip6_route.h>
50	#include <net/addrconf.h>
51	#include <net/rawv6.h>
52	#include <net/icmp.h>
53	#include <net/xfrm.h>
54	#include <net/checksum.h>
55	#include <linux/mroute6.h>
56	#include <net/l3mdev.h>
57	#include <net/lwtunnel.h>
58	#include <net/ip_tunnels.h>
59
60	static int ip6_finish_output2(struct net net, struct* sock sk, struct* sk_buff *skb)
61	{
62	struct dst_entry *dst = skb_dst(skb);
63	struct net_device *dev = dst->dev;
64	struct inet6_dev *idev = ip6_dst_idev(dst);
65	unsigned int hh_len = LL_RESERVED_SPACE(dev);
66	const struct in6_addr daddr, nexthop;
67	struct ipv6hdr *hdr;
68	struct neighbour *neigh;
69	int ret;
70
71	/ Be paranoid, rather than too clever. /
72	if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
73	/ Make sure idev stays alive /
74	rcu_read_lock();
75	skb = skb_expand_head(skb, headroom: hh_len);
76	if (!skb) {
77	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
78	rcu_read_unlock();
79	return -ENOMEM;
80	}
81	rcu_read_unlock();
82	}
83
84	hdr = ipv6_hdr(skb);
85	daddr = &hdr->daddr;
86	if (ipv6_addr_is_multicast(addr: daddr)) {
87	if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
88	((mroute6_is_socket(net, skb) &&
89	!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) \|\|
90	ipv6_chk_mcast_addr(dev, group: daddr, src_addr: &hdr->saddr))) {
91	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
92
93	/ Do not check for IFF_ALLMULTI; multicast routing*
94	is not supported in any case.
95	*/
96	if (newskb)
97	NF_HOOK(pf: NFPROTO_IPV6, hook: NF_INET_POST_ROUTING,
98	net, sk, skb: newskb, NULL, out: newskb->dev,
99	okfn: dev_loopback_xmit);
100
101	if (hdr->hop_limit == `0`) {
102	IP6_INC_STATS(net, idev,
103	IPSTATS_MIB_OUTDISCARDS);
104	kfree_skb(skb);
105	return `0`;
106	}
107	}
108
109	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
110	if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
111	!(dev->flags & IFF_LOOPBACK)) {
112	kfree_skb(skb);
113	return `0`;
114	}
115	}
116
117	if (lwtunnel_xmit_redirect(lwtstate: dst->lwtstate)) {
118	int res = lwtunnel_xmit(skb);
119
120	if (res != LWTUNNEL_XMIT_CONTINUE)
121	return res;
122	}
123
124	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
125
126	rcu_read_lock();
127	nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
128	neigh = __ipv6_neigh_lookup_noref(dev, pkey: nexthop);
129
130	if (IS_ERR_OR_NULL(ptr: neigh)) {
131	if (unlikely(!neigh))
132	neigh = __neigh_create(tbl: &nd_tbl, pkey: nexthop, dev, want_ref: false);
133	if (IS_ERR(ptr: neigh)) {
134	rcu_read_unlock();
135	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
136	kfree_skb_reason(skb, reason: SKB_DROP_REASON_NEIGH_CREATEFAIL);
137	return -EINVAL;
138	}
139	}
140	sock_confirm_neigh(skb, n: neigh);
141	ret = neigh_output(n: neigh, skb, skip_cache: false);
142	rcu_read_unlock();
143	return ret;
144	}
145
146	static int
147	ip6_finish_output_gso_slowpath_drop(struct net net, struct* sock *sk,
148	struct sk_buff skb, unsigned* int mtu)
149	{
150	struct sk_buff segs, nskb;
151	netdev_features_t features;
152	int ret = `0`;
153
154	/ Please see corresponding comment in ip_finish_output_gso*
155	* describing the cases where GSO segment length exceeds the
156	* egress MTU.
157	*/
158	features = netif_skb_features(skb);
159	segs = skb_gso_segment(skb, features: features & ~NETIF_F_GSO_MASK);
160	if (IS_ERR_OR_NULL(ptr: segs)) {
161	kfree_skb(skb);
162	return -ENOMEM;
163	}
164
165	consume_skb(skb);
166
167	skb_list_walk_safe(segs, segs, nskb) {
168	int err;
169
170	skb_mark_not_on_list(skb: segs);
171	/ Last GSO segment can be smaller than gso_size (and MTU).*
172	* Adding a fragment header would produce an "atomic fragment",
173	* which is considered harmful (RFC-8021). Avoid that.
174	*/
175	err = segs->len > mtu ?
176	ip6_fragment(net, sk, skb: segs, output: ip6_finish_output2) :
177	ip6_finish_output2(net, sk, skb: segs);
178	if (err && ret == `0`)
179	ret = err;
180	}
181
182	return ret;
183	}
184
185	static int ip6_finish_output_gso(struct net net, struct* sock *sk,
186	struct sk_buff skb, unsigned* int mtu)
187	{
188	if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
189	!skb_gso_validate_network_len(skb, mtu))
190	return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
191
192	return ip6_finish_output2(net, sk, skb);
193	}
194
195	static int __ip6_finish_output(struct net net, struct* sock sk, struct* sk_buff *skb)
196	{
197	unsigned int mtu;
198
199	#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
200	/ Policy lookup after SNAT yielded a new policy /
201	if (skb_dst(skb)->xfrm) {
202	IP6CB(skb)->flags \|= IP6SKB_REROUTED;
203	return dst_output(net, sk, skb);
204	}
205	#endif
206
207	mtu = ip6_skb_dst_mtu(skb);
208	if (skb_is_gso(skb))
209	return ip6_finish_output_gso(net, sk, skb, mtu);
210
211	if (skb->len > mtu \|\|
212	(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
213	return ip6_fragment(net, sk, skb, output: ip6_finish_output2);
214
215	return ip6_finish_output2(net, sk, skb);
216	}
217
218	static int ip6_finish_output(struct net net, struct* sock sk, struct* sk_buff *skb)
219	{
220	int ret;
221
222	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
223	switch (ret) {
224	case NET_XMIT_SUCCESS:
225	case NET_XMIT_CN:
226	return __ip6_finish_output(net, sk, skb) ? : ret;
227	default:
228	kfree_skb_reason(skb, reason: SKB_DROP_REASON_BPF_CGROUP_EGRESS);
229	return ret;
230	}
231	}
232
233	int ip6_output(struct net net, struct* sock sk, struct* sk_buff *skb)
234	{
235	struct net_device dev = skb_dst(skb)->dev, indev = skb->dev;
236	struct inet6_dev *idev = ip6_dst_idev(dst: skb_dst(skb));
237
238	skb->protocol = htons(ETH_P_IPV6);
239	skb->dev = dev;
240
241	if (unlikely(!idev \|\| READ_ONCE(idev->cnf.disable_ipv6))) {
242	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
243	kfree_skb_reason(skb, reason: SKB_DROP_REASON_IPV6DISABLED);
244	return `0`;
245	}
246
247	return NF_HOOK_COND(pf: NFPROTO_IPV6, hook: NF_INET_POST_ROUTING,
248	net, sk, skb, in: indev, out: dev,
249	okfn: ip6_finish_output,
250	cond: !(IP6CB(skb)->flags & IP6SKB_REROUTED));
251	}
252	EXPORT_SYMBOL(ip6_output);
253
254	bool ip6_autoflowlabel(struct net net, const* struct sock *sk)
255	{
256	if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
257	return ip6_default_np_autolabel(net);
258	return inet6_test_bit(AUTOFLOWLABEL, sk);
259	}
260
261	/*
262	* xmit an sk_buff (used by TCP and SCTP)
263	* Note : socket lock is not held for SYNACK packets, but might be modified
264	* by calls to skb_set_owner_w() and ipv6_local_error(),
265	* which are using proper atomic operations or spinlocks.
266	*/
267	int ip6_xmit(const struct sock sk, struct* sk_buff skb, struct* flowi6 *fl6,
268	__u32 mark, struct ipv6_txoptions opt, int* tclass, u32 priority)
269	{
270	struct net *net = sock_net(sk);
271	const struct ipv6_pinfo *np = inet6_sk(sk: sk);
272	struct in6_addr *first_hop = &fl6->daddr;
273	struct dst_entry *dst = skb_dst(skb);
274	struct net_device *dev = dst->dev;
275	struct inet6_dev *idev = ip6_dst_idev(dst);
276	struct hop_jumbo_hdr *hop_jumbo;
277	int hoplen = sizeof(*hop_jumbo);
278	unsigned int head_room;
279	struct ipv6hdr *hdr;
280	u8 proto = fl6->flowi6_proto;
281	int seg_len = skb->len;
282	int hlimit = -`1`;
283	u32 mtu;
284
285	head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
286	if (opt)
287	head_room += opt->opt_nflen + opt->opt_flen;
288
289	if (unlikely(head_room > skb_headroom(skb))) {
290	/ Make sure idev stays alive /
291	rcu_read_lock();
292	skb = skb_expand_head(skb, headroom: head_room);
293	if (!skb) {
294	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
295	rcu_read_unlock();
296	return -ENOBUFS;
297	}
298	rcu_read_unlock();
299	}
300
301	if (opt) {
302	seg_len += opt->opt_nflen + opt->opt_flen;
303
304	if (opt->opt_flen)
305	ipv6_push_frag_opts(skb, opt, proto: &proto);
306
307	if (opt->opt_nflen)
308	ipv6_push_nfrag_opts(skb, opt, proto: &proto, daddr_p: &first_hop,
309	saddr: &fl6->saddr);
310	}
311
312	if (unlikely(seg_len > IPV6_MAXPLEN)) {
313	hop_jumbo = skb_push(skb, len: hoplen);
314
315	hop_jumbo->nexthdr = proto;
316	hop_jumbo->hdrlen = `0`;
317	hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
318	hop_jumbo->tlv_len = `4`;
319	hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
320
321	proto = IPPROTO_HOPOPTS;
322	seg_len = `0`;
323	IP6CB(skb)->flags \|= IP6SKB_FAKEJUMBO;
324	}
325
326	skb_push(skb, len: sizeof(struct ipv6hdr));
327	skb_reset_network_header(skb);
328	hdr = ipv6_hdr(skb);
329
330	/*
331	* Fill in the IPv6 header
332	*/
333	if (np)
334	hlimit = READ_ONCE(np->hop_limit);
335	if (hlimit < `0`)
336	hlimit = ip6_dst_hoplimit(dst);
337
338	ip6_flow_hdr(hdr, tclass, flowlabel: ip6_make_flowlabel(net, skb, flowlabel: fl6->flowlabel,
339	autolabel: ip6_autoflowlabel(net, sk), fl6));
340
341	hdr->payload_len = htons(seg_len);
342	hdr->nexthdr = proto;
343	hdr->hop_limit = hlimit;
344
345	hdr->saddr = fl6->saddr;
346	hdr->daddr = *first_hop;
347
348	skb->protocol = htons(ETH_P_IPV6);
349	skb->priority = priority;
350	skb->mark = mark;
351
352	mtu = dst_mtu(dst);
353	if ((skb->len <= mtu) \|\| skb->ignore_df \|\| skb_is_gso(skb)) {
354	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
355
356	/ if egress device is enslaved to an L3 master device pass the*
357	* skb to its handler for processing
358	*/
359	skb = l3mdev_ip6_out(sk: (struct sock *)sk, skb);
360	if (unlikely(!skb))
361	return `0`;
362
363	/ hooks should never assume socket lock is held.*
364	* we promote our socket to non const
365	*/
366	return NF_HOOK(pf: NFPROTO_IPV6, hook: NF_INET_LOCAL_OUT,
367	net, sk: (struct sock *)sk, skb, NULL, out: dev,
368	okfn: dst_output);
369	}
370
371	skb->dev = dev;
372	/ ipv6_local_error() does not require socket lock,*
373	* we promote our socket to non const
374	*/
375	ipv6_local_error(sk: (struct sock *)sk, EMSGSIZE, fl6, info: mtu);
376
377	IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
378	kfree_skb(skb);
379	return -EMSGSIZE;
380	}
381	EXPORT_SYMBOL(ip6_xmit);
382
383	static int ip6_call_ra_chain(struct sk_buff skb, int* sel)
384	{
385	struct ip6_ra_chain *ra;
386	struct sock *last = NULL;
387
388	read_lock(&ip6_ra_lock);
389	for (ra = ip6_ra_chain; ra; ra = ra->next) {
390	struct sock *sk = ra->sk;
391	if (sk && ra->sel == sel &&
392	(!sk->sk_bound_dev_if \|\|
393	sk->sk_bound_dev_if == skb->dev->ifindex)) {
394
395	if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
396	!net_eq(net1: sock_net(sk), net2: dev_net(dev: skb->dev))) {
397	continue;
398	}
399	if (last) {
400	struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
401	if (skb2)
402	rawv6_rcv(sk: last, skb: skb2);
403	}
404	last = sk;
405	}
406	}
407
408	if (last) {
409	rawv6_rcv(sk: last, skb);
410	read_unlock(&ip6_ra_lock);
411	return `1`;
412	}
413	read_unlock(&ip6_ra_lock);
414	return `0`;
415	}
416
417	static int ip6_forward_proxy_check(struct sk_buff *skb)
418	{
419	struct ipv6hdr *hdr = ipv6_hdr(skb);
420	u8 nexthdr = hdr->nexthdr;
421	__be16 frag_off;
422	int offset;
423
424	if (ipv6_ext_hdr(nexthdr)) {
425	offset = ipv6_skip_exthdr(skb, start: sizeof(*hdr), nexthdrp: &nexthdr, frag_offp: &frag_off);
426	if (offset < `0`)
427	return `0`;
428	} else
429	offset = sizeof(struct ipv6hdr);
430
431	if (nexthdr == IPPROTO_ICMPV6) {
432	struct icmp6hdr *icmp6;
433
434	if (!pskb_may_pull(skb, len: (skb_network_header(skb) +
435	offset + `1` - skb->data)))
436	return `0`;
437
438	icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
439
440	switch (icmp6->icmp6_type) {
441	case NDISC_ROUTER_SOLICITATION:
442	case NDISC_ROUTER_ADVERTISEMENT:
443	case NDISC_NEIGHBOUR_SOLICITATION:
444	case NDISC_NEIGHBOUR_ADVERTISEMENT:
445	case NDISC_REDIRECT:
446	/ For reaction involving unicast neighbor discovery*
447	* message destined to the proxied address, pass it to
448	* input function.
449	*/
450	return `1`;
451	default:
452	break;
453	}
454	}
455
456	/*
457	* The proxying router can't forward traffic sent to a link-local
458	* address, so signal the sender and discard the packet. This
459	* behavior is clarified by the MIPv6 specification.
460	*/
461	if (ipv6_addr_type(addr: &hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
462	dst_link_failure(skb);
463	return -`1`;
464	}
465
466	return `0`;
467	}
468
469	static inline int ip6_forward_finish(struct net net, struct* sock *sk,
470	struct sk_buff *skb)
471	{
472	#ifdef CONFIG_NET_SWITCHDEV
473	if (skb->offload_l3_fwd_mark) {
474	consume_skb(skb);
475	return `0`;
476	}
477	#endif
478
479	skb_clear_tstamp(skb);
480	return dst_output(net, sk, skb);
481	}
482
483	static bool ip6_pkt_too_big(const struct sk_buff skb, unsigned* int mtu)
484	{
485	if (skb->len <= mtu)
486	return false;
487
488	/ ipv6 conntrack defrag sets max_frag_size + ignore_df /
489	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
490	return true;
491
492	if (skb->ignore_df)
493	return false;
494
495	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
496	return false;
497
498	return true;
499	}
500
501	int ip6_forward(struct sk_buff *skb)
502	{
503	struct dst_entry *dst = skb_dst(skb);
504	struct ipv6hdr *hdr = ipv6_hdr(skb);
505	struct inet6_skb_parm *opt = IP6CB(skb);
506	struct net *net = dev_net(dev: dst->dev);
507	struct inet6_dev *idev;
508	SKB_DR(reason);
509	u32 mtu;
510
511	idev = __in6_dev_get_safely(dev: dev_get_by_index_rcu(net, IP6CB(skb)->iif));
512	if (READ_ONCE(net->ipv6.devconf_all->forwarding) == `0`)
513	goto error;
514
515	if (skb->pkt_type != PACKET_HOST)
516	goto drop;
517
518	if (unlikely(skb->sk))
519	goto drop;
520
521	if (skb_warn_if_lro(skb))
522	goto drop;
523
524	if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) &&
525	(!idev \|\| !READ_ONCE(idev->cnf.disable_policy)) &&
526	!xfrm6_policy_check(NULL, dir: XFRM_POLICY_FWD, skb)) {
527	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
528	goto drop;
529	}
530
531	skb_forward_csum(skb);
532
533	/*
534	* We DO NOT make any processing on
535	* RA packets, pushing them to user level AS IS
536	* without ane WARRANTY that application will be able
537	* to interpret them. The reason is that we
538	* cannot make anything clever here.
539	*
540	* We are not end-node, so that if packet contains
541	* AH/ESP, we cannot make anything.
542	* Defragmentation also would be mistake, RA packets
543	* cannot be fragmented, because there is no warranty
544	* that different fragments will go along one path. --ANK
545	*/
546	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
547	if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
548	return `0`;
549	}
550
551	/*
552	* check and decrement ttl
553	*/
554	if (hdr->hop_limit <= `1`) {
555	icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, info: `0`);
556	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
557
558	kfree_skb_reason(skb, reason: SKB_DROP_REASON_IP_INHDR);
559	return -ETIMEDOUT;
560	}
561
562	/ XXX: idev->cnf.proxy_ndp? /
563	if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
564	pneigh_lookup(tbl: &nd_tbl, net, key: &hdr->daddr, dev: skb->dev, creat: `0`)) {
565	int proxied = ip6_forward_proxy_check(skb);
566	if (proxied > `0`) {
567	/ It's tempting to decrease the hop limit*
568	* here by 1, as we do at the end of the
569	* function too.
570	*
571	* But that would be incorrect, as proxying is
572	* not forwarding. The ip6_input function
573	* will handle this packet locally, and it
574	* depends on the hop limit being unchanged.
575	*
576	* One example is the NDP hop limit, that
577	* always has to stay 255, but other would be
578	* similar checks around RA packets, where the
579	* user can even change the desired limit.
580	*/
581	return ip6_input(skb);
582	} else if (proxied < `0`) {
583	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
584	goto drop;
585	}
586	}
587
588	if (!xfrm6_route_forward(skb)) {
589	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
590	SKB_DR_SET(reason, XFRM_POLICY);
591	goto drop;
592	}
593	dst = skb_dst(skb);
594
595	/ IPv6 specs say nothing about it, but it is clear that we cannot*
596	send redirects to source routed frames.
597	We don't send redirects to frames decapsulated from IPsec.
598	*/
599	if (IP6CB(skb)->iif == dst->dev->ifindex &&
600	opt->srcrt == `0` && !skb_sec_path(skb)) {
601	struct in6_addr *target = NULL;
602	struct inet_peer *peer;
603	struct rt6_info *rt;
604
605	/*
606	* incoming and outgoing devices are the same
607	* send a redirect.
608	*/
609
610	rt = dst_rt6_info(dst);
611	if (rt->rt6i_flags & RTF_GATEWAY)
612	target = &rt->rt6i_gateway;
613	else
614	target = &hdr->daddr;
615
616	rcu_read_lock();
617	peer = inet_getpeer_v6(base: net->ipv6.peers, v6daddr: &hdr->daddr);
618
619	/ Limit redirects both by destination (here)*
620	and by source (inside ndisc_send_redirect)
621	*/
622	if (inet_peer_xrlim_allow(peer, timeout: `1`*HZ))
623	ndisc_send_redirect(skb, target);
624	rcu_read_unlock();
625	} else {
626	int addrtype = ipv6_addr_type(addr: &hdr->saddr);
627
628	/ This check is security critical. /
629	if (addrtype == IPV6_ADDR_ANY \|\|
630	addrtype & (IPV6_ADDR_MULTICAST \| IPV6_ADDR_LOOPBACK))
631	goto error;
632	if (addrtype & IPV6_ADDR_LINKLOCAL) {
633	icmpv6_send(skb, ICMPV6_DEST_UNREACH,
634	ICMPV6_NOT_NEIGHBOUR, info: `0`);
635	goto error;
636	}
637	}
638
639	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
640
641	mtu = ip6_dst_mtu_maybe_forward(dst, forwarding: true);
642	if (mtu < IPV6_MIN_MTU)
643	mtu = IPV6_MIN_MTU;
644
645	if (ip6_pkt_too_big(skb, mtu)) {
646	/ Again, force OUTPUT device used as source address /
647	skb->dev = dst->dev;
648	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, code: `0`, info: mtu);
649	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
650	__IP6_INC_STATS(net, ip6_dst_idev(dst),
651	IPSTATS_MIB_FRAGFAILS);
652	kfree_skb_reason(skb, reason: SKB_DROP_REASON_PKT_TOO_BIG);
653	return -EMSGSIZE;
654	}
655
656	if (skb_cow(skb, headroom: dst->dev->hard_header_len)) {
657	__IP6_INC_STATS(net, ip6_dst_idev(dst),
658	IPSTATS_MIB_OUTDISCARDS);
659	goto drop;
660	}
661
662	hdr = ipv6_hdr(skb);
663
664	/ Mangling hops number delayed to point after skb COW /
665
666	hdr->hop_limit--;
667
668	return NF_HOOK(pf: NFPROTO_IPV6, hook: NF_INET_FORWARD,
669	net, NULL, skb, in: skb->dev, out: dst->dev,
670	okfn: ip6_forward_finish);
671
672	error:
673	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
674	SKB_DR_SET(reason, IP_INADDRERRORS);
675	drop:
676	kfree_skb_reason(skb, reason);
677	return -EINVAL;
678	}
679
680	static void ip6_copy_metadata(struct sk_buff to, struct* sk_buff *from)
681	{
682	to->pkt_type = from->pkt_type;
683	to->priority = from->priority;
684	to->protocol = from->protocol;
685	skb_dst_drop(skb: to);
686	skb_dst_set(skb: to, dst: dst_clone(dst: skb_dst(skb: from)));
687	to->dev = from->dev;
688	to->mark = from->mark;
689
690	skb_copy_hash(to, from);
691
692	#ifdef CONFIG_NET_SCHED
693	to->tc_index = from->tc_index;
694	#endif
695	nf_copy(dst: to, src: from);
696	skb_ext_copy(dst: to, src: from);
697	skb_copy_secmark(to, from);
698	}
699
700	int ip6_fraglist_init(struct sk_buff skb, unsigned* int hlen, u8 *prevhdr,
701	u8 nexthdr, __be32 frag_id,
702	struct ip6_fraglist_iter *iter)
703	{
704	unsigned int first_len;
705	struct frag_hdr *fh;
706
707	/ BUILD HEADER /
708	*prevhdr = NEXTHDR_FRAGMENT;
709	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
710	if (!iter->tmp_hdr)
711	return -ENOMEM;
712
713	iter->frag = skb_shinfo(skb)->frag_list;
714	skb_frag_list_init(skb);
715
716	iter->offset = `0`;
717	iter->hlen = hlen;
718	iter->frag_id = frag_id;
719	iter->nexthdr = nexthdr;
720
721	__skb_pull(skb, len: hlen);
722	fh = __skb_push(skb, len: sizeof(struct frag_hdr));
723	__skb_push(skb, len: hlen);
724	skb_reset_network_header(skb);
725	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
726
727	fh->nexthdr = nexthdr;
728	fh->reserved = `0`;
729	fh->frag_off = htons(IP6_MF);
730	fh->identification = frag_id;
731
732	first_len = skb_pagelen(skb);
733	skb->data_len = first_len - skb_headlen(skb);
734	skb->len = first_len;
735	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
736
737	return `0`;
738	}
739	EXPORT_SYMBOL(ip6_fraglist_init);
740
741	void ip6_fraglist_prepare(struct sk_buff *skb,
742	struct ip6_fraglist_iter *iter)
743	{
744	struct sk_buff *frag = iter->frag;
745	unsigned int hlen = iter->hlen;
746	struct frag_hdr *fh;
747
748	frag->ip_summed = CHECKSUM_NONE;
749	skb_reset_transport_header(skb: frag);
750	fh = __skb_push(skb: frag, len: sizeof(struct frag_hdr));
751	__skb_push(skb: frag, len: hlen);
752	skb_reset_network_header(skb: frag);
753	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
754	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
755	fh->nexthdr = iter->nexthdr;
756	fh->reserved = `0`;
757	fh->frag_off = htons(iter->offset);
758	if (frag->next)
759	fh->frag_off \|= htons(IP6_MF);
760	fh->identification = iter->frag_id;
761	ipv6_hdr(skb: frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
762	ip6_copy_metadata(to: frag, from: skb);
763	}
764	EXPORT_SYMBOL(ip6_fraglist_prepare);
765
766	void ip6_frag_init(struct sk_buff skb, unsigned* int hlen, unsigned int mtu,
767	unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
768	u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
769	{
770	state->prevhdr = prevhdr;
771	state->nexthdr = nexthdr;
772	state->frag_id = frag_id;
773
774	state->hlen = hlen;
775	state->mtu = mtu;
776
777	state->left = skb->len - hlen; / Space per frame /
778	state->ptr = hlen; / Where to start from /
779
780	state->hroom = hdr_room;
781	state->troom = needed_tailroom;
782
783	state->offset = `0`;
784	}
785	EXPORT_SYMBOL(ip6_frag_init);
786
787	struct sk_buff ip6_frag_next(struct* sk_buff skb, struct* ip6_frag_state *state)
788	{
789	u8 prevhdr = state->prevhdr, fragnexthdr_offset;
790	struct sk_buff *frag;
791	struct frag_hdr *fh;
792	unsigned int len;
793
794	len = state->left;
795	/ IF: it doesn't fit, use 'mtu' - the data space left /
796	if (len > state->mtu)
797	len = state->mtu;
798	/ IF: we are not sending up to and including the packet end*
799	then align the next start on an eight byte boundary /*
800	if (len < state->left)
801	len &= ~`7`;
802
803	/ Allocate buffer /
804	frag = alloc_skb(size: len + state->hlen + sizeof(struct frag_hdr) +
805	state->hroom + state->troom, GFP_ATOMIC);
806	if (!frag)
807	return ERR_PTR(error: -ENOMEM);
808
809	/*
810	* Set up data on packet
811	*/
812
813	ip6_copy_metadata(to: frag, from: skb);
814	skb_reserve(skb: frag, len: state->hroom);
815	skb_put(skb: frag, len: len + state->hlen + sizeof(struct frag_hdr));
816	skb_reset_network_header(skb: frag);
817	fh = (struct frag_hdr *)(skb_network_header(skb: frag) + state->hlen);
818	frag->transport_header = (frag->network_header + state->hlen +
819	sizeof(struct frag_hdr));
820
821	/*
822	* Charge the memory for the fragment to any owner
823	* it might possess
824	*/
825	if (skb->sk)
826	skb_set_owner_w(skb: frag, sk: skb->sk);
827
828	/*
829	* Copy the packet header into the new buffer.
830	*/
831	skb_copy_from_linear_data(skb, to: skb_network_header(skb: frag), len: state->hlen);
832
833	fragnexthdr_offset = skb_network_header(skb: frag);
834	fragnexthdr_offset += prevhdr - skb_network_header(skb);
835	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
836
837	/*
838	* Build fragment header.
839	*/
840	fh->nexthdr = state->nexthdr;
841	fh->reserved = `0`;
842	fh->identification = state->frag_id;
843
844	/*
845	* Copy a block of the IP datagram.
846	*/
847	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
848	len));
849	state->left -= len;
850
851	fh->frag_off = htons(state->offset);
852	if (state->left > `0`)
853	fh->frag_off \|= htons(IP6_MF);
854	ipv6_hdr(skb: frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
855
856	state->ptr += len;
857	state->offset += len;
858
859	return frag;
860	}
861	EXPORT_SYMBOL(ip6_frag_next);
862
863	int ip6_fragment(struct net net, struct* sock sk, struct* sk_buff *skb,
864	int (output)(struct* net , struct* sock , struct* sk_buff *))
865	{
866	struct sk_buff *frag;
867	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
868	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
869	inet6_sk(sk: skb->sk) : NULL;
870	u8 tstamp_type = skb->tstamp_type;
871	struct ip6_frag_state state;
872	unsigned int mtu, hlen, nexthdr_offset;
873	ktime_t tstamp = skb->tstamp;
874	int hroom, err = `0`;
875	__be32 frag_id;
876	u8 *prevhdr, nexthdr = `0`;
877
878	err = ip6_find_1stfragopt(skb, nexthdr: &prevhdr);
879	if (err < `0`)
880	goto fail;
881	hlen = err;
882	nexthdr = *prevhdr;
883	nexthdr_offset = prevhdr - skb_network_header(skb);
884
885	mtu = ip6_skb_dst_mtu(skb);
886
887	/ We must not fragment if the socket is set to force MTU discovery*
888	* or if the skb it not generated by a local socket.
889	*/
890	if (unlikely(!skb->ignore_df && skb->len > mtu))
891	goto fail_toobig;
892
893	if (IP6CB(skb)->frag_max_size) {
894	if (IP6CB(skb)->frag_max_size > mtu)
895	goto fail_toobig;
896
897	/ don't send fragments larger than what we received /
898	mtu = IP6CB(skb)->frag_max_size;
899	if (mtu < IPV6_MIN_MTU)
900	mtu = IPV6_MIN_MTU;
901	}
902
903	if (np) {
904	u32 frag_size = READ_ONCE(np->frag_size);
905
906	if (frag_size && frag_size < mtu)
907	mtu = frag_size;
908	}
909	if (mtu < hlen + sizeof(struct frag_hdr) + `8`)
910	goto fail_toobig;
911	mtu -= hlen + sizeof(struct frag_hdr);
912
913	frag_id = ipv6_select_ident(net, daddr: &ipv6_hdr(skb)->daddr,
914	saddr: &ipv6_hdr(skb)->saddr);
915
916	if (skb->ip_summed == CHECKSUM_PARTIAL &&
917	(err = skb_checksum_help(skb)))
918	goto fail;
919
920	prevhdr = skb_network_header(skb) + nexthdr_offset;
921	hroom = LL_RESERVED_SPACE(rt->dst.dev);
922	if (skb_has_frag_list(skb)) {
923	unsigned int first_len = skb_pagelen(skb);
924	struct ip6_fraglist_iter iter;
925	struct sk_buff *frag2;
926
927	if (first_len - hlen > mtu \|\|
928	((first_len - hlen) & `7`) \|\|
929	skb_cloned(skb) \|\|
930	skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
931	goto slow_path;
932
933	skb_walk_frags(skb, frag) {
934	/ Correct geometry. /
935	if (frag->len > mtu \|\|
936	((frag->len & `7`) && frag->next) \|\|
937	skb_headroom(skb: frag) < (hlen + hroom + sizeof(struct frag_hdr)))
938	goto slow_path_clean;
939
940	/ Partially cloned skb? /
941	if (skb_shared(skb: frag))
942	goto slow_path_clean;
943
944	BUG_ON(frag->sk);
945	if (skb->sk) {
946	frag->sk = skb->sk;
947	frag->destructor = sock_wfree;
948	}
949	skb->truesize -= frag->truesize;
950	}
951
952	err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
953	&iter);
954	if (err < `0`)
955	goto fail;
956
957	/ We prevent @rt from being freed. /
958	rcu_read_lock();
959
960	for (;;) {
961	/ Prepare header of the next frame,*
962	* before previous one went down. */
963	if (iter.frag)
964	ip6_fraglist_prepare(skb, &iter);
965
966	skb_set_delivery_time(skb, kt: tstamp, tstamp_type);
967	err = output(net, sk, skb);
968	if (!err)
969	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
970	IPSTATS_MIB_FRAGCREATES);
971
972	if (err \|\| !iter.frag)
973	break;
974
975	skb = ip6_fraglist_next(iter: &iter);
976	}
977
978	kfree(objp: iter.tmp_hdr);
979
980	if (err == `0`) {
981	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
982	IPSTATS_MIB_FRAGOKS);
983	rcu_read_unlock();
984	return `0`;
985	}
986
987	kfree_skb_list(segs: iter.frag);
988
989	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
990	IPSTATS_MIB_FRAGFAILS);
991	rcu_read_unlock();
992	return err;
993
994	slow_path_clean:
995	skb_walk_frags(skb, frag2) {
996	if (frag2 == frag)
997	break;
998	frag2->sk = NULL;
999	frag2->destructor = NULL;
1000	skb->truesize += frag2->truesize;
1001	}
1002	}
1003
1004	slow_path:
1005	/*
1006	* Fragment the datagram.
1007	*/
1008
1009	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
1010	LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
1011	&state);
1012
1013	/*
1014	* Keep copying data until we run out.
1015	*/
1016
1017	while (state.left > `0`) {
1018	frag = ip6_frag_next(skb, &state);
1019	if (IS_ERR(ptr: frag)) {
1020	err = PTR_ERR(ptr: frag);
1021	goto fail;
1022	}
1023
1024	/*
1025	* Put this fragment into the sending queue.
1026	*/
1027	skb_set_delivery_time(skb: frag, kt: tstamp, tstamp_type);
1028	err = output(net, sk, frag);
1029	if (err)
1030	goto fail;
1031
1032	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1033	IPSTATS_MIB_FRAGCREATES);
1034	}
1035	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1036	IPSTATS_MIB_FRAGOKS);
1037	consume_skb(skb);
1038	return err;
1039
1040	fail_toobig:
1041	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, code: `0`, info: mtu);
1042	err = -EMSGSIZE;
1043
1044	fail:
1045	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1046	IPSTATS_MIB_FRAGFAILS);
1047	kfree_skb(skb);
1048	return err;
1049	}
1050
1051	static inline int ip6_rt_check(const struct rt6key *rt_key,
1052	const struct in6_addr *fl_addr,
1053	const struct in6_addr *addr_cache)
1054	{
1055	return (rt_key->plen != `128` \|\| !ipv6_addr_equal(a1: fl_addr, a2: &rt_key->addr)) &&
1056	(!addr_cache \|\| !ipv6_addr_equal(a1: fl_addr, a2: addr_cache));
1057	}
1058
1059	static struct dst_entry ip6_sk_dst_check(struct* sock *sk,
1060	struct dst_entry *dst,
1061	const struct flowi6 *fl6)
1062	{
1063	struct ipv6_pinfo *np = inet6_sk(sk: sk);
1064	struct rt6_info *rt;
1065
1066	if (!dst)
1067	goto out;
1068
1069	if (dst->ops->family != AF_INET6) {
1070	dst_release(dst);
1071	return NULL;
1072	}
1073
1074	rt = dst_rt6_info(dst);
1075	/ Yes, checking route validity in not connected*
1076	* case is not very simple. Take into account,
1077	* that we do not support routing by source, TOS,
1078	* and MSG_DONTROUTE --ANK (980726)
1079	*
1080	* 1. ip6_rt_check(): If route was host route,
1081	* check that cached destination is current.
1082	* If it is network route, we still may
1083	* check its validity using saved pointer
1084	* to the last used address: daddr_cache.
1085	* We do not want to save whole address now,
1086	* (because main consumer of this service
1087	* is tcp, which has not this problem),
1088	* so that the last trick works only on connected
1089	* sockets.
1090	* 2. oif also should be the same.
1091	*/
1092	if (ip6_rt_check(rt_key: &rt->rt6i_dst, fl_addr: &fl6->daddr, addr_cache: np->daddr_cache) \|\|
1093	#ifdef CONFIG_IPV6_SUBTREES
1094	ip6_rt_check(rt_key: &rt->rt6i_src, fl_addr: &fl6->saddr, addr_cache: np->saddr_cache) \|\|
1095	#endif
1096	(fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
1097	dst_release(dst);
1098	dst = NULL;
1099	}
1100
1101	out:
1102	return dst;
1103	}
1104
1105	static int ip6_dst_lookup_tail(struct net net, const* struct sock *sk,
1106	struct dst_entry dst, struct** flowi6 *fl6)
1107	{
1108	#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1109	struct neighbour *n;
1110	struct rt6_info *rt;
1111	#endif
1112	int err;
1113	int flags = `0`;
1114
1115	/ The correct way to handle this would be to do*
1116	* ip6_route_get_saddr, and then ip6_route_output; however,
1117	* the route-specific preferred source forces the
1118	* ip6_route_output call _before_ ip6_route_get_saddr.
1119	*
1120	* In source specific routing (no src=any default route),
1121	* ip6_route_output will fail given src=any saddr, though, so
1122	* that's why we try it again later.
1123	*/
1124	if (ipv6_addr_any(a: &fl6->saddr)) {
1125	struct fib6_info *from;
1126	struct rt6_info *rt;
1127
1128	*dst = ip6_route_output(net, sk, fl6);
1129	rt = (dst)->error ? NULL : dst_rt6_info(dst);
1130
1131	rcu_read_lock();
1132	from = rt ? rcu_dereference(rt->from) : NULL;
1133	err = ip6_route_get_saddr(net, f6i: from, daddr: &fl6->daddr,
1134	prefs: sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : `0`,
1135	l3mdev_index: fl6->flowi6_l3mdev,
1136	saddr: &fl6->saddr);
1137	rcu_read_unlock();
1138
1139	if (err)
1140	goto out_err_release;
1141
1142	/ If we had an erroneous initial result, pretend it*
1143	* never existed and let the SA-enabled version take
1144	* over.
1145	*/
1146	if ((*dst)->error) {
1147	dst_release(dst: *dst);
1148	*dst = NULL;
1149	}
1150
1151	if (fl6->flowi6_oif)
1152	flags \|= RT6_LOOKUP_F_IFACE;
1153	}
1154
1155	if (!*dst)
1156	*dst = ip6_route_output_flags(net, sk, fl6, flags);
1157
1158	err = (*dst)->error;
1159	if (err)
1160	goto out_err_release;
1161
1162	#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1163	/*
1164	* Here if the dst entry we've looked up
1165	* has a neighbour entry that is in the INCOMPLETE
1166	* state and the src address from the flow is
1167	* marked as OPTIMISTIC, we release the found
1168	* dst entry and replace it instead with the
1169	* dst entry of the nexthop router
1170	*/
1171	rt = dst_rt6_info(*dst);
1172	rcu_read_lock();
1173	n = __ipv6_neigh_lookup_noref(dev: rt->dst.dev,
1174	pkey: rt6_nexthop(rt, daddr: &fl6->daddr));
1175	err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : `0`;
1176	rcu_read_unlock();
1177
1178	if (err) {
1179	struct inet6_ifaddr *ifp;
1180	struct flowi6 fl_gw6;
1181	int redirect;
1182
1183	ifp = ipv6_get_ifaddr(net, addr: &fl6->saddr,
1184	dev: (*dst)->dev, strict: `1`);
1185
1186	redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1187	if (ifp)
1188	in6_ifa_put(ifp);
1189
1190	if (redirect) {
1191	/*
1192	* We need to get the dst entry for the
1193	* default router instead
1194	*/
1195	dst_release(dst: *dst);
1196	memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1197	memset(&fl_gw6.daddr, `0`, sizeof(struct in6_addr));
1198	*dst = ip6_route_output(net, sk, fl6: &fl_gw6);
1199	err = (*dst)->error;
1200	if (err)
1201	goto out_err_release;
1202	}
1203	}
1204	#endif
1205	if (ipv6_addr_v4mapped(a: &fl6->saddr) &&
1206	!(ipv6_addr_v4mapped(a: &fl6->daddr) \|\| ipv6_addr_any(a: &fl6->daddr))) {
1207	err = -EAFNOSUPPORT;
1208	goto out_err_release;
1209	}
1210
1211	return `0`;
1212
1213	out_err_release:
1214	dst_release(dst: *dst);
1215	*dst = NULL;
1216
1217	if (err == -ENETUNREACH)
1218	IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1219	return err;
1220	}
1221
1222	/**
1223	* ip6_dst_lookup - perform route lookup on flow
1224	* @net: Network namespace to perform lookup in
1225	* @sk: socket which provides route info
1226	* @dst: pointer to dst_entry * for result
1227	* @fl6: flow to lookup
1228	*
1229	* This function performs a route lookup on the given flow.
1230	*
1231	* It returns zero on success, or a standard errno code on error.
1232	*/
1233	int ip6_dst_lookup(struct net net, struct* sock sk, struct* dst_entry **dst,
1234	struct flowi6 *fl6)
1235	{
1236	*dst = NULL;
1237	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1238	}
1239	EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1240
1241	/**
1242	* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1243	* @net: Network namespace to perform lookup in
1244	* @sk: socket which provides route info
1245	* @fl6: flow to lookup
1246	* @final_dst: final destination address for ipsec lookup
1247	*
1248	* This function performs a route lookup on the given flow.
1249	*
1250	* It returns a valid dst pointer on success, or a pointer encoded
1251	* error code.
1252	*/
1253	struct dst_entry ip6_dst_lookup_flow(struct* net net, const* struct sock sk, struct* flowi6 *fl6,
1254	const struct in6_addr *final_dst)
1255	{
1256	struct dst_entry *dst = NULL;
1257	int err;
1258
1259	err = ip6_dst_lookup_tail(net, sk, dst: &dst, fl6);
1260	if (err)
1261	return ERR_PTR(error: err);
1262	if (final_dst)
1263	fl6->daddr = *final_dst;
1264
1265	return xfrm_lookup_route(net, dst_orig: dst, fl: flowi6_to_flowi(fl6), sk, flags: `0`);
1266	}
1267	EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1268
1269	/**
1270	* ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1271	* @sk: socket which provides the dst cache and route info
1272	* @fl6: flow to lookup
1273	* @final_dst: final destination address for ipsec lookup
1274	* @connected: whether @sk is connected or not
1275	*
1276	* This function performs a route lookup on the given flow with the
1277	* possibility of using the cached route in the socket if it is valid.
1278	* It will take the socket dst lock when operating on the dst cache.
1279	* As a result, this function can only be used in process context.
1280	*
1281	* In addition, for a connected socket, cache the dst in the socket
1282	* if the current cache is not valid.
1283	*
1284	* It returns a valid dst pointer on success, or a pointer encoded
1285	* error code.
1286	*/
1287	struct dst_entry ip6_sk_dst_lookup_flow(struct* sock sk, struct* flowi6 *fl6,
1288	const struct in6_addr *final_dst,
1289	bool connected)
1290	{
1291	struct dst_entry *dst = sk_dst_check(sk, cookie: inet6_sk(sk: sk)->dst_cookie);
1292
1293	dst = ip6_sk_dst_check(sk, dst, fl6);
1294	if (dst)
1295	return dst;
1296
1297	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1298	if (connected && !IS_ERR(ptr: dst))
1299	ip6_sk_dst_store_flow(sk, dst: dst_clone(dst), fl6);
1300
1301	return dst;
1302	}
1303	EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1304
1305	static inline struct ipv6_opt_hdr ip6_opt_dup(struct* ipv6_opt_hdr *src,
1306	gfp_t gfp)
1307	{
1308	return src ? kmemdup(src, (src->hdrlen + `1`) * `8`, gfp) : NULL;
1309	}
1310
1311	static inline struct ipv6_rt_hdr ip6_rthdr_dup(struct* ipv6_rt_hdr *src,
1312	gfp_t gfp)
1313	{
1314	return src ? kmemdup(src, (src->hdrlen + `1`) * `8`, gfp) : NULL;
1315	}
1316
1317	static void ip6_append_data_mtu(unsigned int *mtu,
1318	int *maxfraglen,
1319	unsigned int fragheaderlen,
1320	struct sk_buff *skb,
1321	struct rt6_info *rt,
1322	unsigned int orig_mtu)
1323	{
1324	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1325	if (!skb) {
1326	/ first fragment, reserve header_len /
1327	*mtu = orig_mtu - rt->dst.header_len;
1328
1329	} else {
1330	/*
1331	* this fragment is not first, the headers
1332	* space is regarded as data space.
1333	*/
1334	*mtu = orig_mtu;
1335	}
1336	maxfraglen = ((mtu - fragheaderlen) & ~`7`)
1337	+ fragheaderlen - sizeof(struct frag_hdr);
1338	}
1339	}
1340
1341	static int ip6_setup_cork(struct sock sk, struct* inet_cork_full *cork,
1342	struct inet6_cork v6_cork, struct* ipcm6_cookie *ipc6,
1343	struct rt6_info *rt)
1344	{
1345	struct ipv6_pinfo *np = inet6_sk(sk: sk);
1346	unsigned int mtu, frag_size;
1347	struct ipv6_txoptions nopt, opt = ipc6->opt;
1348
1349	/ callers pass dst together with a reference, set it first so*
1350	* ip6_cork_release() can put it down even in case of an error.
1351	*/
1352	cork->base.dst = &rt->dst;
1353
1354	/*
1355	* setup for corking
1356	*/
1357	if (opt) {
1358	if (WARN_ON(v6_cork->opt))
1359	return -EINVAL;
1360
1361	nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1362	if (unlikely(!nopt))
1363	return -ENOBUFS;
1364
1365	nopt->tot_len = sizeof(*opt);
1366	nopt->opt_flen = opt->opt_flen;
1367	nopt->opt_nflen = opt->opt_nflen;
1368
1369	nopt->dst0opt = ip6_opt_dup(src: opt->dst0opt, gfp: sk->sk_allocation);
1370	if (opt->dst0opt && !nopt->dst0opt)
1371	return -ENOBUFS;
1372
1373	nopt->dst1opt = ip6_opt_dup(src: opt->dst1opt, gfp: sk->sk_allocation);
1374	if (opt->dst1opt && !nopt->dst1opt)
1375	return -ENOBUFS;
1376
1377	nopt->hopopt = ip6_opt_dup(src: opt->hopopt, gfp: sk->sk_allocation);
1378	if (opt->hopopt && !nopt->hopopt)
1379	return -ENOBUFS;
1380
1381	nopt->srcrt = ip6_rthdr_dup(src: opt->srcrt, gfp: sk->sk_allocation);
1382	if (opt->srcrt && !nopt->srcrt)
1383	return -ENOBUFS;
1384
1385	/ need source address above miyazawa/
1386	}
1387	v6_cork->hop_limit = ipc6->hlimit;
1388	v6_cork->tclass = ipc6->tclass;
1389	v6_cork->dontfrag = ipc6->dontfrag;
1390	if (rt->dst.flags & DST_XFRM_TUNNEL)
1391	mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
1392	READ_ONCE(rt->dst.dev->mtu) : dst_mtu(dst: &rt->dst);
1393	else
1394	mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
1395	READ_ONCE(rt->dst.dev->mtu) : dst_mtu(dst: xfrm_dst_path(dst: &rt->dst));
1396
1397	frag_size = READ_ONCE(np->frag_size);
1398	if (frag_size && frag_size < mtu)
1399	mtu = frag_size;
1400
1401	cork->base.fragsize = mtu;
1402	cork->base.gso_size = ipc6->gso_size;
1403	cork->base.tx_flags = `0`;
1404	cork->base.mark = ipc6->sockc.mark;
1405	cork->base.priority = ipc6->sockc.priority;
1406	sock_tx_timestamp(sk, sockc: &ipc6->sockc, tx_flags: &cork->base.tx_flags);
1407	if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
1408	cork->base.flags \|= IPCORK_TS_OPT_ID;
1409	cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
1410	}
1411	cork->base.length = `0`;
1412	cork->base.transmit_time = ipc6->sockc.transmit_time;
1413
1414	return `0`;
1415	}
1416
1417	static int __ip6_append_data(struct sock *sk,
1418	struct sk_buff_head *queue,
1419	struct inet_cork_full *cork_full,
1420	struct inet6_cork *v6_cork,
1421	struct page_frag *pfrag,
1422	int getfrag(void from, char* to, int* offset,
1423	int len, int odd, struct sk_buff *skb),
1424	void from, size_t length, int* transhdrlen,
1425	unsigned int flags)
1426	{
1427	struct sk_buff skb, skb_prev = NULL;
1428	struct inet_cork *cork = &cork_full->base;
1429	struct flowi6 *fl6 = &cork_full->fl.u.ip6;
1430	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1431	struct ubuf_info *uarg = NULL;
1432	int exthdrlen = `0`;
1433	int dst_exthdrlen = `0`;
1434	int hh_len;
1435	int copy;
1436	int err;
1437	int offset = `0`;
1438	bool zc = false;
1439	u32 tskey = `0`;
1440	struct rt6_info *rt = dst_rt6_info(cork->dst);
1441	bool paged, hold_tskey = false, extra_uref = false;
1442	struct ipv6_txoptions *opt = v6_cork->opt;
1443	int csummode = CHECKSUM_NONE;
1444	unsigned int maxnonfragsize, headersize;
1445	unsigned int wmem_alloc_delta = `0`;
1446
1447	skb = skb_peek_tail(list_: queue);
1448	if (!skb) {
1449	exthdrlen = opt ? opt->opt_flen : `0`;
1450	dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1451	}
1452
1453	paged = !!cork->gso_size;
1454	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1455	orig_mtu = mtu;
1456
1457	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1458
1459	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1460	(opt ? opt->opt_nflen : `0`);
1461
1462	headersize = sizeof(struct ipv6hdr) +
1463	(opt ? opt->opt_flen + opt->opt_nflen : `0`) +
1464	rt->rt6i_nfheader_len;
1465
1466	if (mtu <= fragheaderlen \|\|
1467	((mtu - fragheaderlen) & ~`7`) + fragheaderlen <= sizeof(struct frag_hdr))
1468	goto emsgsize;
1469
1470	maxfraglen = ((mtu - fragheaderlen) & ~`7`) + fragheaderlen -
1471	sizeof(struct frag_hdr);
1472
1473	/ as per RFC 7112 section 5, the entire IPv6 Header Chain must fit*
1474	* the first fragment
1475	*/
1476	if (headersize + transhdrlen > mtu)
1477	goto emsgsize;
1478
1479	if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
1480	(sk->sk_protocol == IPPROTO_UDP \|\|
1481	sk->sk_protocol == IPPROTO_ICMPV6 \|\|
1482	sk->sk_protocol == IPPROTO_RAW)) {
1483	ipv6_local_rxpmtu(sk, fl6, mtu: mtu - headersize +
1484	sizeof(struct ipv6hdr));
1485	goto emsgsize;
1486	}
1487
1488	if (ip6_sk_ignore_df(sk))
1489	maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1490	else
1491	maxnonfragsize = mtu;
1492
1493	if (cork->length + length > maxnonfragsize - headersize) {
1494	emsgsize:
1495	pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), `0`);
1496	ipv6_local_error(sk, EMSGSIZE, fl6, info: pmtu);
1497	return -EMSGSIZE;
1498	}
1499
1500	/ CHECKSUM_PARTIAL only with no extension headers and when*
1501	* we are not going to fragment
1502	*/
1503	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1504	headersize == sizeof(struct ipv6hdr) &&
1505	length <= mtu - headersize &&
1506	(!(flags & MSG_MORE) \|\| cork->gso_size) &&
1507	rt->dst.dev->features & (NETIF_F_IPV6_CSUM \| NETIF_F_HW_CSUM))
1508	csummode = CHECKSUM_PARTIAL;
1509
1510	if ((flags & MSG_ZEROCOPY) && length) {
1511	struct msghdr *msg = from;
1512
1513	if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
1514	if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
1515	return -EINVAL;
1516
1517	/ Leave uarg NULL if can't zerocopy, callers should*
1518	* be able to handle it.
1519	*/
1520	if ((rt->dst.dev->features & NETIF_F_SG) &&
1521	csummode == CHECKSUM_PARTIAL) {
1522	paged = true;
1523	zc = true;
1524	uarg = msg->msg_ubuf;
1525	}
1526	} else if (sock_flag(sk, flag: SOCK_ZEROCOPY)) {
1527	uarg = msg_zerocopy_realloc(sk, size: length, uarg: skb_zcopy(skb),
1528	devmem: false);
1529	if (!uarg)
1530	return -ENOBUFS;
1531	extra_uref = !skb_zcopy(skb); / only ref on new uarg /
1532	if (rt->dst.dev->features & NETIF_F_SG &&
1533	csummode == CHECKSUM_PARTIAL) {
1534	paged = true;
1535	zc = true;
1536	} else {
1537	uarg_to_msgzc(uarg)->zerocopy = `0`;
1538	skb_zcopy_set(skb, uarg, have_ref: &extra_uref);
1539	}
1540	}
1541	} else if ((flags & MSG_SPLICE_PAGES) && length) {
1542	if (inet_test_bit(HDRINCL, sk))
1543	return -EPERM;
1544	if (rt->dst.dev->features & NETIF_F_SG &&
1545	getfrag == ip_generic_getfrag)
1546	/ We need an empty buffer to attach stuff to /
1547	paged = true;
1548	else
1549	flags &= ~MSG_SPLICE_PAGES;
1550	}
1551
1552	if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1553	READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
1554	if (cork->flags & IPCORK_TS_OPT_ID) {
1555	tskey = cork->ts_opt_id;
1556	} else {
1557	tskey = atomic_inc_return(v: &sk->sk_tskey) - `1`;
1558	hold_tskey = true;
1559	}
1560	}
1561
1562	/*
1563	* Let's try using as much space as possible.
1564	* Use MTU if total length of the message fits into the MTU.
1565	* Otherwise, we need to reserve fragment header and
1566	* fragment alignment (= 8-15 octects, in total).
1567	*
1568	* Note that we may need to "move" the data from the tail
1569	* of the buffer to the new fragment when we split
1570	* the message.
1571	*
1572	* FIXME: It may be fragmented into multiple chunks
1573	* at once if non-fragmentable extension headers
1574	* are too large.
1575	* --yoshfuji
1576	*/
1577
1578	cork->length += length;
1579	if (!skb)
1580	goto alloc_new_skb;
1581
1582	while (length > `0`) {
1583	/ Check if the remaining data fits into current packet. /
1584	copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len;
1585	if (copy < length)
1586	copy = maxfraglen - skb->len;
1587
1588	if (copy <= `0`) {
1589	char *data;
1590	unsigned int datalen;
1591	unsigned int fraglen;
1592	unsigned int fraggap;
1593	unsigned int alloclen, alloc_extra;
1594	unsigned int pagedlen;
1595	alloc_new_skb:
1596	/ There's no room in the current skb /
1597	if (skb)
1598	fraggap = skb->len - maxfraglen;
1599	else
1600	fraggap = `0`;
1601	/ update mtu and maxfraglen if necessary /
1602	if (!skb \|\| !skb_prev)
1603	ip6_append_data_mtu(mtu: &mtu, maxfraglen: &maxfraglen,
1604	fragheaderlen, skb, rt,
1605	orig_mtu);
1606
1607	skb_prev = skb;
1608
1609	/*
1610	* If remaining data exceeds the mtu,
1611	* we know we need more fragment(s).
1612	*/
1613	datalen = length + fraggap;
1614
1615	if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen)
1616	datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1617	fraglen = datalen + fragheaderlen;
1618	pagedlen = `0`;
1619
1620	alloc_extra = hh_len;
1621	alloc_extra += dst_exthdrlen;
1622	alloc_extra += rt->dst.trailer_len;
1623
1624	/ We just reserve space for fragment header.*
1625	* Note: this may be overallocation if the message
1626	* (without MSG_MORE) fits into the MTU.
1627	*/
1628	alloc_extra += sizeof(struct frag_hdr);
1629
1630	if ((flags & MSG_MORE) &&
1631	!(rt->dst.dev->features&NETIF_F_SG))
1632	alloclen = mtu;
1633	else if (!paged &&
1634	(fraglen + alloc_extra < SKB_MAX_ALLOC \|\|
1635	!(rt->dst.dev->features & NETIF_F_SG)))
1636	alloclen = fraglen;
1637	else {
1638	alloclen = fragheaderlen + transhdrlen;
1639	pagedlen = datalen - transhdrlen;
1640	}
1641	alloclen += alloc_extra;
1642
1643	if (datalen != length + fraggap) {
1644	/*
1645	* this is not the last fragment, the trailer
1646	* space is regarded as data space.
1647	*/
1648	datalen += rt->dst.trailer_len;
1649	}
1650
1651	fraglen = datalen + fragheaderlen;
1652
1653	copy = datalen - transhdrlen - fraggap - pagedlen;
1654	/ [!] NOTE: copy may be negative if pagedlen>0*
1655	* because then the equation may reduces to -fraggap.
1656	*/
1657	if (copy < `0` && !(flags & MSG_SPLICE_PAGES)) {
1658	err = -EINVAL;
1659	goto error;
1660	}
1661	if (transhdrlen) {
1662	skb = sock_alloc_send_skb(sk, size: alloclen,
1663	noblock: (flags & MSG_DONTWAIT), errcode: &err);
1664	} else {
1665	skb = NULL;
1666	if (refcount_read(r: &sk->sk_wmem_alloc) + wmem_alloc_delta <=
1667	`2` * sk->sk_sndbuf)
1668	skb = alloc_skb(size: alloclen,
1669	priority: sk->sk_allocation);
1670	if (unlikely(!skb))
1671	err = -ENOBUFS;
1672	}
1673	if (!skb)
1674	goto error;
1675	/*
1676	* Fill in the control structures
1677	*/
1678	skb->protocol = htons(ETH_P_IPV6);
1679	skb->ip_summed = csummode;
1680	skb->csum = `0`;
1681	/ reserve for fragmentation and ipsec header /
1682	skb_reserve(skb, len: hh_len + sizeof(struct frag_hdr) +
1683	dst_exthdrlen);
1684
1685	/*
1686	* Find where to start putting bytes
1687	*/
1688	data = skb_put(skb, len: fraglen - pagedlen);
1689	skb_set_network_header(skb, offset: exthdrlen);
1690	data += fragheaderlen;
1691	skb->transport_header = (skb->network_header +
1692	fragheaderlen);
1693	if (fraggap) {
1694	skb->csum = skb_copy_and_csum_bits(
1695	skb: skb_prev, offset: maxfraglen,
1696	to: data + transhdrlen, len: fraggap);
1697	skb_prev->csum = csum_sub(csum: skb_prev->csum,
1698	addend: skb->csum);
1699	data += fraggap;
1700	pskb_trim_unique(skb: skb_prev, len: maxfraglen);
1701	}
1702	if (copy > `0` &&
1703	INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1704	from, data + transhdrlen, offset,
1705	copy, fraggap, skb) < `0`) {
1706	err = -EFAULT;
1707	kfree_skb(skb);
1708	goto error;
1709	} else if (flags & MSG_SPLICE_PAGES) {
1710	copy = `0`;
1711	}
1712
1713	offset += copy;
1714	length -= copy + transhdrlen;
1715	transhdrlen = `0`;
1716	exthdrlen = `0`;
1717	dst_exthdrlen = `0`;
1718
1719	/ Only the initial fragment is time stamped /
1720	skb_shinfo(skb)->tx_flags = cork->tx_flags;
1721	cork->tx_flags = `0`;
1722	skb_shinfo(skb)->tskey = tskey;
1723	tskey = `0`;
1724	skb_zcopy_set(skb, uarg, have_ref: &extra_uref);
1725
1726	if ((flags & MSG_CONFIRM) && !skb_prev)
1727	skb_set_dst_pending_confirm(skb, val: `1`);
1728
1729	/*
1730	* Put the packet on the pending queue
1731	*/
1732	if (!skb->destructor) {
1733	skb->destructor = sock_wfree;
1734	skb->sk = sk;
1735	wmem_alloc_delta += skb->truesize;
1736	}
1737	__skb_queue_tail(list: queue, newsk: skb);
1738	continue;
1739	}
1740
1741	if (copy > length)
1742	copy = length;
1743
1744	if (!(rt->dst.dev->features&NETIF_F_SG) &&
1745	skb_tailroom(skb) >= copy) {
1746	unsigned int off;
1747
1748	off = skb->len;
1749	if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1750	from, skb_put(skb, copy),
1751	offset, copy, off, skb) < `0`) {
1752	__skb_trim(skb, len: off);
1753	err = -EFAULT;
1754	goto error;
1755	}
1756	} else if (flags & MSG_SPLICE_PAGES) {
1757	struct msghdr *msg = from;
1758
1759	err = -EIO;
1760	if (WARN_ON_ONCE(copy > msg->msg_iter.count))
1761	goto error;
1762
1763	err = skb_splice_from_iter(skb, iter: &msg->msg_iter, maxsize: copy,
1764	gfp: sk->sk_allocation);
1765	if (err < `0`)
1766	goto error;
1767	copy = err;
1768	wmem_alloc_delta += copy;
1769	} else if (!zc) {
1770	int i = skb_shinfo(skb)->nr_frags;
1771
1772	err = -ENOMEM;
1773	if (!sk_page_frag_refill(sk, pfrag))
1774	goto error;
1775
1776	skb_zcopy_downgrade_managed(skb);
1777	if (!skb_can_coalesce(skb, i, page: pfrag->page,
1778	off: pfrag->offset)) {
1779	err = -EMSGSIZE;
1780	if (i == MAX_SKB_FRAGS)
1781	goto error;
1782
1783	__skb_fill_page_desc(skb, i, page: pfrag->page,
1784	off: pfrag->offset, size: `0`);
1785	skb_shinfo(skb)->nr_frags = ++i;
1786	get_page(page: pfrag->page);
1787	}
1788	copy = min_t(int, copy, pfrag->size - pfrag->offset);
1789	if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1790	from,
1791	page_address(pfrag->page) + pfrag->offset,
1792	offset, copy, skb->len, skb) < `0`)
1793	goto error_efault;
1794
1795	pfrag->offset += copy;
1796	skb_frag_size_add(frag: &skb_shinfo(skb)->frags[i - `1`], delta: copy);
1797	skb->len += copy;
1798	skb->data_len += copy;
1799	skb->truesize += copy;
1800	wmem_alloc_delta += copy;
1801	} else {
1802	err = skb_zerocopy_iter_dgram(skb, msg: from, len: copy);
1803	if (err < `0`)
1804	goto error;
1805	}
1806	offset += copy;
1807	length -= copy;
1808	}
1809
1810	if (wmem_alloc_delta)
1811	refcount_add(i: wmem_alloc_delta, r: &sk->sk_wmem_alloc);
1812	return `0`;
1813
1814	error_efault:
1815	err = -EFAULT;
1816	error:
1817	net_zcopy_put_abort(uarg, have_uref: extra_uref);
1818	cork->length -= length;
1819	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1820	refcount_add(i: wmem_alloc_delta, r: &sk->sk_wmem_alloc);
1821	if (hold_tskey)
1822	atomic_dec(v: &sk->sk_tskey);
1823	return err;
1824	}
1825
1826	int ip6_append_data(struct sock *sk,
1827	int getfrag(void from, char* to, int* offset, int len,
1828	int odd, struct sk_buff *skb),
1829	void from, size_t length, int* transhdrlen,
1830	struct ipcm6_cookie ipc6, struct* flowi6 *fl6,
1831	struct rt6_info rt, unsigned* int flags)
1832	{
1833	struct inet_sock *inet = inet_sk(sk);
1834	struct ipv6_pinfo *np = inet6_sk(sk: sk);
1835	int exthdrlen;
1836	int err;
1837
1838	if (flags&MSG_PROBE)
1839	return `0`;
1840	if (skb_queue_empty(list: &sk->sk_write_queue)) {
1841	/*
1842	* setup for corking
1843	*/
1844	dst_hold(dst: &rt->dst);
1845	err = ip6_setup_cork(sk, cork: &inet->cork, v6_cork: &np->cork,
1846	ipc6, rt);
1847	if (err)
1848	return err;
1849
1850	inet->cork.fl.u.ip6 = *fl6;
1851	exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : `0`);
1852	length += exthdrlen;
1853	transhdrlen += exthdrlen;
1854	} else {
1855	transhdrlen = `0`;
1856	}
1857
1858	return __ip6_append_data(sk, queue: &sk->sk_write_queue, cork_full: &inet->cork,
1859	v6_cork: &np->cork, pfrag: sk_page_frag(sk), getfrag,
1860	from, length, transhdrlen, flags);
1861	}
1862	EXPORT_SYMBOL_GPL(ip6_append_data);
1863
1864	static void ip6_cork_steal_dst(struct sk_buff skb, struct* inet_cork_full *cork)
1865	{
1866	struct dst_entry *dst = cork->base.dst;
1867
1868	cork->base.dst = NULL;
1869	skb_dst_set(skb, dst);
1870	}
1871
1872	static void ip6_cork_release(struct inet_cork_full *cork,
1873	struct inet6_cork *v6_cork)
1874	{
1875	if (v6_cork->opt) {
1876	struct ipv6_txoptions *opt = v6_cork->opt;
1877
1878	kfree(objp: opt->dst0opt);
1879	kfree(objp: opt->dst1opt);
1880	kfree(objp: opt->hopopt);
1881	kfree(objp: opt->srcrt);
1882	kfree(objp: opt);
1883	v6_cork->opt = NULL;
1884	}
1885
1886	if (cork->base.dst) {
1887	dst_release(dst: cork->base.dst);
1888	cork->base.dst = NULL;
1889	}
1890	}
1891
1892	struct sk_buff __ip6_make_skb(struct* sock *sk,
1893	struct sk_buff_head *queue,
1894	struct inet_cork_full *cork,
1895	struct inet6_cork *v6_cork)
1896	{
1897	struct sk_buff skb, tmp_skb;
1898	struct sk_buff **tail_skb;
1899	struct in6_addr *final_dst;
1900	struct net *net = sock_net(sk);
1901	struct ipv6hdr *hdr;
1902	struct ipv6_txoptions *opt = v6_cork->opt;
1903	struct rt6_info *rt = dst_rt6_info(cork->base.dst);
1904	struct flowi6 *fl6 = &cork->fl.u.ip6;
1905	unsigned char proto = fl6->flowi6_proto;
1906
1907	skb = __skb_dequeue(list: queue);
1908	if (!skb)
1909	goto out;
1910	tail_skb = &(skb_shinfo(skb)->frag_list);
1911
1912	/ move skb->data to ip header from ext header /
1913	if (skb->data < skb_network_header(skb))
1914	__skb_pull(skb, len: skb_network_offset(skb));
1915	while ((tmp_skb = __skb_dequeue(list: queue)) != NULL) {
1916	__skb_pull(skb: tmp_skb, len: skb_network_header_len(skb));
1917	*tail_skb = tmp_skb;
1918	tail_skb = &(tmp_skb->next);
1919	skb->len += tmp_skb->len;
1920	skb->data_len += tmp_skb->len;
1921	skb->truesize += tmp_skb->truesize;
1922	tmp_skb->destructor = NULL;
1923	tmp_skb->sk = NULL;
1924	}
1925
1926	/ Allow local fragmentation. /
1927	skb->ignore_df = ip6_sk_ignore_df(sk);
1928	__skb_pull(skb, len: skb_network_header_len(skb));
1929
1930	final_dst = &fl6->daddr;
1931	if (opt && opt->opt_flen)
1932	ipv6_push_frag_opts(skb, opt, proto: &proto);
1933	if (opt && opt->opt_nflen)
1934	ipv6_push_nfrag_opts(skb, opt, proto: &proto, daddr_p: &final_dst, saddr: &fl6->saddr);
1935
1936	skb_push(skb, len: sizeof(struct ipv6hdr));
1937	skb_reset_network_header(skb);
1938	hdr = ipv6_hdr(skb);
1939
1940	ip6_flow_hdr(hdr, tclass: v6_cork->tclass,
1941	flowlabel: ip6_make_flowlabel(net, skb, flowlabel: fl6->flowlabel,
1942	autolabel: ip6_autoflowlabel(net, sk), fl6));
1943	hdr->hop_limit = v6_cork->hop_limit;
1944	hdr->nexthdr = proto;
1945	hdr->saddr = fl6->saddr;
1946	hdr->daddr = *final_dst;
1947
1948	skb->priority = cork->base.priority;
1949	skb->mark = cork->base.mark;
1950	if (sk_is_tcp(sk))
1951	skb_set_delivery_time(skb, kt: cork->base.transmit_time, tstamp_type: SKB_CLOCK_MONOTONIC);
1952	else
1953	skb_set_delivery_type_by_clockid(skb, kt: cork->base.transmit_time, clockid: sk->sk_clockid);
1954
1955	ip6_cork_steal_dst(skb, cork);
1956	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1957	if (proto == IPPROTO_ICMPV6) {
1958	struct inet6_dev *idev = ip6_dst_idev(dst: skb_dst(skb));
1959	u8 icmp6_type;
1960
1961	if (sk->sk_socket->type == SOCK_RAW &&
1962	!(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
1963	icmp6_type = fl6->fl6_icmp_type;
1964	else
1965	icmp6_type = icmp6_hdr(skb)->icmp6_type;
1966	ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1967	ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1968	}
1969
1970	ip6_cork_release(cork, v6_cork);
1971	out:
1972	return skb;
1973	}
1974
1975	int ip6_send_skb(struct sk_buff *skb)
1976	{
1977	struct net *net = sock_net(sk: skb->sk);
1978	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
1979	int err;
1980
1981	rcu_read_lock();
1982	err = ip6_local_out(net, sk: skb->sk, skb);
1983	if (err) {
1984	if (err > `0`)
1985	err = net_xmit_errno(err);
1986	if (err)
1987	IP6_INC_STATS(net, rt->rt6i_idev,
1988	IPSTATS_MIB_OUTDISCARDS);
1989	}
1990
1991	rcu_read_unlock();
1992	return err;
1993	}
1994
1995	int ip6_push_pending_frames(struct sock *sk)
1996	{
1997	struct sk_buff *skb;
1998
1999	skb = ip6_finish_skb(sk);
2000	if (!skb)
2001	return `0`;
2002
2003	return ip6_send_skb(skb);
2004	}
2005	EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
2006
2007	static void __ip6_flush_pending_frames(struct sock *sk,
2008	struct sk_buff_head *queue,
2009	struct inet_cork_full *cork,
2010	struct inet6_cork *v6_cork)
2011	{
2012	struct sk_buff *skb;
2013
2014	while ((skb = __skb_dequeue_tail(list: queue)) != NULL) {
2015	if (skb_dst(skb))
2016	IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
2017	IPSTATS_MIB_OUTDISCARDS);
2018	kfree_skb(skb);
2019	}
2020
2021	ip6_cork_release(cork, v6_cork);
2022	}
2023
2024	void ip6_flush_pending_frames(struct sock *sk)
2025	{
2026	__ip6_flush_pending_frames(sk, queue: &sk->sk_write_queue,
2027	cork: &inet_sk(sk)->cork, v6_cork: &inet6_sk(sk: sk)->cork);
2028	}
2029	EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
2030
2031	struct sk_buff ip6_make_skb(struct* sock *sk,
2032	int getfrag(void from, char* to, int* offset,
2033	int len, int odd, struct sk_buff *skb),
2034	void from, size_t length, int* transhdrlen,
2035	struct ipcm6_cookie ipc6, struct* rt6_info *rt,
2036	unsigned int flags, struct inet_cork_full *cork)
2037	{
2038	struct inet6_cork v6_cork;
2039	struct sk_buff_head queue;
2040	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : `0`);
2041	int err;
2042
2043	if (flags & MSG_PROBE) {
2044	dst_release(dst: &rt->dst);
2045	return NULL;
2046	}
2047
2048	__skb_queue_head_init(list: &queue);
2049
2050	cork->base.flags = `0`;
2051	cork->base.addr = `0`;
2052	cork->base.opt = NULL;
2053	v6_cork.opt = NULL;
2054	err = ip6_setup_cork(sk, cork, v6_cork: &v6_cork, ipc6, rt);
2055	if (err) {
2056	ip6_cork_release(cork, v6_cork: &v6_cork);
2057	return ERR_PTR(error: err);
2058	}
2059
2060	err = __ip6_append_data(sk, queue: &queue, cork_full: cork, v6_cork: &v6_cork,
2061	pfrag: &current->task_frag, getfrag, from,
2062	length: length + exthdrlen, transhdrlen: transhdrlen + exthdrlen,
2063	flags);
2064	if (err) {
2065	__ip6_flush_pending_frames(sk, queue: &queue, cork, v6_cork: &v6_cork);
2066	return ERR_PTR(error: err);
2067	}
2068
2069	return __ip6_make_skb(sk, queue: &queue, cork, v6_cork: &v6_cork);
2070	}
2071

source code of linux/net/ipv6/ip6_output.c