icmp.c source code [linux/net/ipv4/icmp.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* NET3: Implementation of the ICMP protocol layer.
4	*
5	* Alan Cox, <alan@lxorguk.ukuu.org.uk>
6	*
7	* Some of the function names and the icmp unreach table for this
8	* module were derived from [icmp.c 1.0.11 06/02/93] by
9	* Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
10	* Other than that this module is a complete rewrite.
11	*
12	* Fixes:
13	* Clemens Fruhwirth : introduce global icmp rate limiting
14	* with icmp type masking ability instead
15	* of broken per type icmp timeouts.
16	* Mike Shaver : RFC1122 checks.
17	* Alan Cox : Multicast ping reply as self.
18	* Alan Cox : Fix atomicity lockup in ip_build_xmit
19	* call.
20	* Alan Cox : Added 216,128 byte paths to the MTU
21	* code.
22	* Martin Mares : RFC1812 checks.
23	* Martin Mares : Can be configured to follow redirects
24	* if acting as a router _without_ a
25	* routing protocol (RFC 1812).
26	* Martin Mares : Echo requests may be configured to
27	* be ignored (RFC 1812).
28	* Martin Mares : Limitation of ICMP error message
29	* transmit rate (RFC 1812).
30	* Martin Mares : TOS and Precedence set correctly
31	* (RFC 1812).
32	* Martin Mares : Now copying as much data from the
33	* original packet as we can without
34	* exceeding 576 bytes (RFC 1812).
35	* Willy Konynenberg : Transparent proxying support.
36	* Keith Owens : RFC1191 correction for 4.2BSD based
37	* path MTU bug.
38	* Thomas Quinot : ICMP Dest Unreach codes up to 15 are
39	* valid (RFC 1812).
40	* Andi Kleen : Check all packet lengths properly
41	* and moved all kfree_skb() up to
42	* icmp_rcv.
43	* Andi Kleen : Move the rate limit bookkeeping
44	* into the dest entry and use a token
45	* bucket filter (thanks to ANK). Make
46	* the rates sysctl configurable.
47	* Yu Tianli : Fixed two ugly bugs in icmp_send
48	* - IP option length was accounted wrongly
49	* - ICMP header length was not accounted
50	* at all.
51	* Tristan Greaves : Added sysctl option to ignore bogus
52	* broadcast responses from broken routers.
53	*
54	* To Fix:
55	*
56	* - Should use skb_pull() instead of all the manual checking.
57	* This would also greatly simply some upper layer error handlers. --AK
58	*/
59
60	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
61
62	#include <linux/module.h>
63	#include <linux/types.h>
64	#include <linux/jiffies.h>
65	#include <linux/kernel.h>
66	#include <linux/fcntl.h>
67	#include <linux/socket.h>
68	#include <linux/in.h>
69	#include <linux/inet.h>
70	#include <linux/inetdevice.h>
71	#include <linux/netdevice.h>
72	#include <linux/string.h>
73	#include <linux/netfilter_ipv4.h>
74	#include <linux/slab.h>
75	#include <net/snmp.h>
76	#include <net/ip.h>
77	#include <net/route.h>
78	#include <net/protocol.h>
79	#include <net/icmp.h>
80	#include <net/tcp.h>
81	#include <net/udp.h>
82	#include <net/raw.h>
83	#include <net/ping.h>
84	#include <linux/skbuff.h>
85	#include <net/sock.h>
86	#include <linux/errno.h>
87	#include <linux/timer.h>
88	#include <linux/init.h>
89	#include <linux/uaccess.h>
90	#include <net/checksum.h>
91	#include <net/xfrm.h>
92	#include <net/inet_common.h>
93	#include <net/ip_fib.h>
94	#include <net/l3mdev.h>
95	#include <net/addrconf.h>
96	#include <net/inet_dscp.h>
97	#define CREATE_TRACE_POINTS
98	#include <trace/events/icmp.h>
99
100	/*
101	* Build xmit assembly blocks
102	*/
103
104	struct icmp_bxm {
105	struct sk_buff *skb;
106	int offset;
107	int data_len;
108
109	struct {
110	struct icmphdr icmph;
111	__be32 times[`3`];
112	} data;
113	int head_len;
114	struct ip_options_data replyopts;
115	};
116
117	/ An array of errno for error messages from dest unreach. /
118	/ RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. /
119
120	const struct icmp_err icmp_err_convert[] = {
121	{
122	.errno = ENETUNREACH, / ICMP_NET_UNREACH /
123	.fatal = `0`,
124	},
125	{
126	.errno = EHOSTUNREACH, / ICMP_HOST_UNREACH /
127	.fatal = `0`,
128	},
129	{
130	.errno = ENOPROTOOPT / ICMP_PROT_UNREACH /,
131	.fatal = `1`,
132	},
133	{
134	.errno = ECONNREFUSED, / ICMP_PORT_UNREACH /
135	.fatal = `1`,
136	},
137	{
138	.errno = EMSGSIZE, / ICMP_FRAG_NEEDED /
139	.fatal = `0`,
140	},
141	{
142	.errno = EOPNOTSUPP, / ICMP_SR_FAILED /
143	.fatal = `0`,
144	},
145	{
146	.errno = ENETUNREACH, / ICMP_NET_UNKNOWN /
147	.fatal = `1`,
148	},
149	{
150	.errno = EHOSTDOWN, / ICMP_HOST_UNKNOWN /
151	.fatal = `1`,
152	},
153	{
154	.errno = ENONET, / ICMP_HOST_ISOLATED /
155	.fatal = `1`,
156	},
157	{
158	.errno = ENETUNREACH, / ICMP_NET_ANO /
159	.fatal = `1`,
160	},
161	{
162	.errno = EHOSTUNREACH, / ICMP_HOST_ANO /
163	.fatal = `1`,
164	},
165	{
166	.errno = ENETUNREACH, / ICMP_NET_UNR_TOS /
167	.fatal = `0`,
168	},
169	{
170	.errno = EHOSTUNREACH, / ICMP_HOST_UNR_TOS /
171	.fatal = `0`,
172	},
173	{
174	.errno = EHOSTUNREACH, / ICMP_PKT_FILTERED /
175	.fatal = `1`,
176	},
177	{
178	.errno = EHOSTUNREACH, / ICMP_PREC_VIOLATION /
179	.fatal = `1`,
180	},
181	{
182	.errno = EHOSTUNREACH, / ICMP_PREC_CUTOFF /
183	.fatal = `1`,
184	},
185	};
186	EXPORT_SYMBOL(icmp_err_convert);
187
188	/*
189	* ICMP control array. This specifies what to do with each ICMP.
190	*/
191
192	struct icmp_control {
193	enum skb_drop_reason (handler)(struct* sk_buff *skb);
194	short error; / This ICMP is classed as an error message /
195	};
196
197	static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+`1`];
198
199	static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
200
201	/ Called with BH disabled /
202	static inline struct sock icmp_xmit_lock(struct* net *net)
203	{
204	struct sock *sk;
205
206	sk = this_cpu_read(ipv4_icmp_sk);
207
208	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
209	/ This can happen if the output path signals a*
210	* dst_link_failure() for an outgoing ICMP packet.
211	*/
212	return NULL;
213	}
214	sock_net_set(sk, net);
215	return sk;
216	}
217
218	static inline void icmp_xmit_unlock(struct sock *sk)
219	{
220	sock_net_set(sk, net: &init_net);
221	spin_unlock(lock: &sk->sk_lock.slock);
222	}
223
224	/**
225	* icmp_global_allow - Are we allowed to send one more ICMP message ?
226	* @net: network namespace
227	*
228	* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
229	* Returns false if we reached the limit and can not send another packet.
230	* Works in tandem with icmp_global_consume().
231	*/
232	bool icmp_global_allow(struct net *net)
233	{
234	u32 delta, now, oldstamp;
235	int incr, new, old;
236
237	/ Note: many cpus could find this condition true.*
238	* Then later icmp_global_consume() could consume more credits,
239	* this is an acceptable race.
240	*/
241	if (atomic_read(v: &net->ipv4.icmp_global_credit) > `0`)
242	return true;
243
244	now = jiffies;
245	oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
246	delta = min_t(u32, now - oldstamp, HZ);
247	if (delta < HZ / `50`)
248	return false;
249
250	incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
251	if (!incr)
252	return false;
253
254	if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
255	old = atomic_read(v: &net->ipv4.icmp_global_credit);
256	do {
257	new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
258	} while (!atomic_try_cmpxchg(v: &net->ipv4.icmp_global_credit, old: &old, new));
259	}
260	return true;
261	}
262	EXPORT_SYMBOL(icmp_global_allow);
263
264	void icmp_global_consume(struct net *net)
265	{
266	int credits = get_random_u32_below(ceil: `3`);
267
268	/ Note: this might make icmp_global.credit negative. /
269	if (credits)
270	atomic_sub(i: credits, v: &net->ipv4.icmp_global_credit);
271	}
272	EXPORT_SYMBOL(icmp_global_consume);
273
274	static bool icmpv4_mask_allow(struct net net, int* type, int code)
275	{
276	if (type > NR_ICMP_TYPES)
277	return true;
278
279	/ Don't limit PMTU discovery. /
280	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
281	return true;
282
283	/ Limit if icmp type is enabled in ratemask. /
284	if (!((`1` << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
285	return true;
286
287	return false;
288	}
289
290	static bool icmpv4_global_allow(struct net net, int* type, int code,
291	bool *apply_ratelimit)
292	{
293	if (icmpv4_mask_allow(net, type, code))
294	return true;
295
296	if (icmp_global_allow(net)) {
297	*apply_ratelimit = true;
298	return true;
299	}
300	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
301	return false;
302	}
303
304	/*
305	* Send an ICMP frame.
306	*/
307
308	static bool icmpv4_xrlim_allow(struct net net, struct* rtable *rt,
309	struct flowi4 fl4, int* type, int code,
310	bool apply_ratelimit)
311	{
312	struct dst_entry *dst = &rt->dst;
313	struct inet_peer *peer;
314	bool rc = true;
315
316	if (!apply_ratelimit)
317	return true;
318
319	/ No rate limit on loopback /
320	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
321	goto out;
322
323	rcu_read_lock();
324	peer = inet_getpeer_v4(base: net->ipv4.peers, v4daddr: fl4->daddr,
325	vif: l3mdev_master_ifindex_rcu(dev: dst->dev));
326	rc = inet_peer_xrlim_allow(peer,
327	READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
328	rcu_read_unlock();
329	out:
330	if (!rc)
331	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
332	else
333	icmp_global_consume(net);
334	return rc;
335	}
336
337	/*
338	* Maintain the counters used in the SNMP statistics for outgoing ICMP
339	*/
340	void icmp_out_count(struct net net, unsigned* char type)
341	{
342	ICMPMSGOUT_INC_STATS(net, type);
343	ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
344	}
345
346	/*
347	* Checksum each fragment, and on the first include the headers and final
348	* checksum.
349	*/
350	static int icmp_glue_bits(void from, char* to, int* offset, int len, int odd,
351	struct sk_buff *skb)
352	{
353	struct icmp_bxm *icmp_param = from;
354	__wsum csum;
355
356	csum = skb_copy_and_csum_bits(skb: icmp_param->skb,
357	offset: icmp_param->offset + offset,
358	to, len);
359
360	skb->csum = csum_block_add(csum: skb->csum, csum2: csum, offset: odd);
361	if (icmp_pointers[icmp_param->data.icmph.type].error)
362	nf_ct_attach(skb, icmp_param->skb);
363	return `0`;
364	}
365
366	static void icmp_push_reply(struct sock *sk,
367	struct icmp_bxm *icmp_param,
368	struct flowi4 *fl4,
369	struct ipcm_cookie ipc, struct* rtable **rt)
370	{
371	struct sk_buff *skb;
372
373	if (ip_append_data(sk, fl4, getfrag: icmp_glue_bits, from: icmp_param,
374	len: icmp_param->data_len+icmp_param->head_len,
375	protolen: icmp_param->head_len,
376	ipc, rt, MSG_DONTWAIT) < `0`) {
377	__ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
378	ip_flush_pending_frames(sk);
379	} else if ((skb = skb_peek(list_: &sk->sk_write_queue)) != NULL) {
380	struct icmphdr *icmph = icmp_hdr(skb);
381	__wsum csum;
382	struct sk_buff *skb1;
383
384	csum = csum_partial_copy_nocheck(src: (void *)&icmp_param->data,
385	dst: (char *)icmph,
386	len: icmp_param->head_len);
387	skb_queue_walk(&sk->sk_write_queue, skb1) {
388	csum = csum_add(csum, addend: skb1->csum);
389	}
390	icmph->checksum = csum_fold(csum);
391	skb->ip_summed = CHECKSUM_NONE;
392	ip_push_pending_frames(sk, fl4);
393	}
394	}
395
396	/*
397	* Driving logic for building and sending ICMP messages.
398	*/
399
400	static void icmp_reply(struct icmp_bxm icmp_param, struct* sk_buff *skb)
401	{
402	struct rtable *rt = skb_rtable(skb);
403	struct net *net = dev_net_rcu(dev: rt->dst.dev);
404	bool apply_ratelimit = false;
405	struct ipcm_cookie ipc;
406	struct flowi4 fl4;
407	struct sock *sk;
408	__be32 daddr, saddr;
409	u32 mark = IP4_REPLY_MARK(net, skb->mark);
410	int type = icmp_param->data.icmph.type;
411	int code = icmp_param->data.icmph.code;
412
413	if (ip_options_echo(net, dopt: &icmp_param->replyopts.opt.opt, skb))
414	return;
415
416	/ Needed by both icmpv4_global_allow and icmp_xmit_lock /
417	local_bh_disable();
418
419	/ is global icmp_msgs_per_sec exhausted ? /
420	if (!icmpv4_global_allow(net, type, code, apply_ratelimit: &apply_ratelimit))
421	goto out_bh_enable;
422
423	sk = icmp_xmit_lock(net);
424	if (!sk)
425	goto out_bh_enable;
426
427	icmp_param->data.icmph.checksum = `0`;
428
429	ipcm_init(ipcm: &ipc);
430	ipc.tos = ip_hdr(skb)->tos;
431	ipc.sockc.mark = mark;
432	daddr = ipc.addr = ip_hdr(skb)->saddr;
433	saddr = fib_compute_spec_dst(skb);
434
435	if (icmp_param->replyopts.opt.opt.optlen) {
436	ipc.opt = &icmp_param->replyopts.opt;
437	if (ipc.opt->opt.srr)
438	daddr = icmp_param->replyopts.opt.opt.faddr;
439	}
440	memset(&fl4, `0`, sizeof(fl4));
441	fl4.daddr = daddr;
442	fl4.saddr = saddr;
443	fl4.flowi4_mark = mark;
444	fl4.flowi4_uid = sock_net_uid(net, NULL);
445	fl4.flowi4_tos = inet_dscp_to_dsfield(dscp: ip4h_dscp(ip4h: ip_hdr(skb)));
446	fl4.flowi4_proto = IPPROTO_ICMP;
447	fl4.flowi4_oif = l3mdev_master_ifindex(dev: skb->dev);
448	security_skb_classify_flow(skb, flic: flowi4_to_flowi_common(fl4: &fl4));
449	rt = ip_route_output_key(net, flp: &fl4);
450	if (IS_ERR(ptr: rt))
451	goto out_unlock;
452	if (icmpv4_xrlim_allow(net, rt, fl4: &fl4, type, code, apply_ratelimit))
453	icmp_push_reply(sk, icmp_param, fl4: &fl4, ipc: &ipc, rt: &rt);
454	ip_rt_put(rt);
455	out_unlock:
456	icmp_xmit_unlock(sk);
457	out_bh_enable:
458	local_bh_enable();
459	}
460
461	/*
462	* The device used for looking up which routing table to use for sending an ICMP
463	* error is preferably the source whenever it is set, which should ensure the
464	* icmp error can be sent to the source host, else lookup using the routing
465	* table of the destination device, else use the main routing table (index 0).
466	*/
467	static struct net_device icmp_get_route_lookup_dev(struct* sk_buff *skb)
468	{
469	struct net_device *route_lookup_dev = NULL;
470
471	if (skb->dev)
472	route_lookup_dev = skb->dev;
473	else if (skb_dst(skb))
474	route_lookup_dev = skb_dst(skb)->dev;
475	return route_lookup_dev;
476	}
477
478	static struct rtable icmp_route_lookup(struct* net net, struct* flowi4 *fl4,
479	struct sk_buff *skb_in,
480	const struct iphdr *iph, __be32 saddr,
481	dscp_t dscp, u32 mark, int type,
482	int code, struct icmp_bxm *param)
483	{
484	struct net_device *route_lookup_dev;
485	struct dst_entry dst, dst2;
486	struct rtable rt, rt2;
487	struct flowi4 fl4_dec;
488	int err;
489
490	memset(fl4, `0`, sizeof(*fl4));
491	fl4->daddr = (param->replyopts.opt.opt.srr ?
492	param->replyopts.opt.opt.faddr : iph->saddr);
493	fl4->saddr = saddr;
494	fl4->flowi4_mark = mark;
495	fl4->flowi4_uid = sock_net_uid(net, NULL);
496	fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
497	fl4->flowi4_proto = IPPROTO_ICMP;
498	fl4->fl4_icmp_type = type;
499	fl4->fl4_icmp_code = code;
500	route_lookup_dev = icmp_get_route_lookup_dev(skb: skb_in);
501	fl4->flowi4_oif = l3mdev_master_ifindex(dev: route_lookup_dev);
502
503	security_skb_classify_flow(skb: skb_in, flic: flowi4_to_flowi_common(fl4));
504	rt = ip_route_output_key_hash(net, flp: fl4, skb: skb_in);
505	if (IS_ERR(ptr: rt))
506	return rt;
507
508	/ No need to clone since we're just using its address. /
509	rt2 = rt;
510
511	dst = xfrm_lookup(net, dst_orig: &rt->dst,
512	fl: flowi4_to_flowi(fl4), NULL, flags: `0`);
513	rt = dst_rtable(dst);
514	if (!IS_ERR(ptr: dst)) {
515	if (rt != rt2)
516	return rt;
517	if (inet_addr_type_dev_table(net, dev: route_lookup_dev,
518	addr: fl4->daddr) == RTN_LOCAL)
519	return rt;
520	} else if (PTR_ERR(ptr: dst) == -EPERM) {
521	rt = NULL;
522	} else {
523	return rt;
524	}
525	err = xfrm_decode_session_reverse(net, skb: skb_in, fl: flowi4_to_flowi(fl4: &fl4_dec), AF_INET);
526	if (err)
527	goto relookup_failed;
528
529	if (inet_addr_type_dev_table(net, dev: route_lookup_dev,
530	addr: fl4_dec.saddr) == RTN_LOCAL) {
531	rt2 = __ip_route_output_key(net, flp: &fl4_dec);
532	if (IS_ERR(ptr: rt2))
533	err = PTR_ERR(ptr: rt2);
534	} else {
535	struct flowi4 fl4_2 = {};
536	unsigned long orefdst;
537
538	fl4_2.daddr = fl4_dec.saddr;
539	rt2 = ip_route_output_key(net, flp: &fl4_2);
540	if (IS_ERR(ptr: rt2)) {
541	err = PTR_ERR(ptr: rt2);
542	goto relookup_failed;
543	}
544	/ Ugh! /
545	orefdst = skb_in->_skb_refdst; / save old refdst /
546	skb_dst_set(skb: skb_in, NULL);
547	err = ip_route_input(skb: skb_in, dst: fl4_dec.daddr, src: fl4_dec.saddr,
548	dscp, devin: rt2->dst.dev) ? -EINVAL : `0`;
549
550	dst_release(dst: &rt2->dst);
551	rt2 = skb_rtable(skb: skb_in);
552	skb_in->_skb_refdst = orefdst; / restore old refdst /
553	}
554
555	if (err)
556	goto relookup_failed;
557
558	dst2 = xfrm_lookup(net, dst_orig: &rt2->dst, fl: flowi4_to_flowi(fl4: &fl4_dec), NULL,
559	flags: XFRM_LOOKUP_ICMP);
560	rt2 = dst_rtable(dst2);
561	if (!IS_ERR(ptr: dst2)) {
562	dst_release(dst: &rt->dst);
563	memcpy(fl4, &fl4_dec, sizeof(*fl4));
564	rt = rt2;
565	} else if (PTR_ERR(ptr: dst2) == -EPERM) {
566	if (rt)
567	dst_release(dst: &rt->dst);
568	return rt2;
569	} else {
570	err = PTR_ERR(ptr: dst2);
571	goto relookup_failed;
572	}
573	return rt;
574
575	relookup_failed:
576	if (rt)
577	return rt;
578	return ERR_PTR(error: err);
579	}
580
581	/*
582	* Send an ICMP message in response to a situation
583	*
584	* RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header.
585	* MAY send more (we do).
586	* MUST NOT change this header information.
587	* MUST NOT reply to a multicast/broadcast IP address.
588	* MUST NOT reply to a multicast/broadcast MAC address.
589	* MUST reply to only the first fragment.
590	*/
591
592	void __icmp_send(struct sk_buff skb_in, int* type, int code, __be32 info,
593	const struct ip_options *opt)
594	{
595	struct iphdr *iph;
596	int room;
597	struct icmp_bxm icmp_param;
598	struct rtable *rt = skb_rtable(skb: skb_in);
599	bool apply_ratelimit = false;
600	struct ipcm_cookie ipc;
601	struct flowi4 fl4;
602	__be32 saddr;
603	u8 tos;
604	u32 mark;
605	struct net *net;
606	struct sock *sk;
607
608	if (!rt)
609	return;
610
611	rcu_read_lock();
612
613	if (rt->dst.dev)
614	net = dev_net_rcu(dev: rt->dst.dev);
615	else if (skb_in->dev)
616	net = dev_net_rcu(dev: skb_in->dev);
617	else
618	goto out;
619
620	/*
621	* Find the original header. It is expected to be valid, of course.
622	* Check this, icmp_send is called from the most obscure devices
623	* sometimes.
624	*/
625	iph = ip_hdr(skb: skb_in);
626
627	if ((u8 *)iph < skb_in->head \|\|
628	(skb_network_header(skb: skb_in) + sizeof(*iph)) >
629	skb_tail_pointer(skb: skb_in))
630	goto out;
631
632	/*
633	* No replies to physical multicast/broadcast
634	*/
635	if (skb_in->pkt_type != PACKET_HOST)
636	goto out;
637
638	/*
639	* Now check at the protocol level
640	*/
641	if (rt->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST))
642	goto out;
643
644	/*
645	* Only reply to fragment 0. We byte re-order the constant
646	* mask for efficiency.
647	*/
648	if (iph->frag_off & htons(IP_OFFSET))
649	goto out;
650
651	/*
652	* If we send an ICMP error to an ICMP error a mess would result..
653	*/
654	if (icmp_pointers[type].error) {
655	/*
656	* We are an error, check if we are replying to an
657	* ICMP error
658	*/
659	if (iph->protocol == IPPROTO_ICMP) {
660	u8 _inner_type, *itp;
661
662	itp = skb_header_pointer(skb: skb_in,
663	offset: skb_network_header(skb: skb_in) +
664	(iph->ihl << `2`) +
665	offsetof(struct icmphdr,
666	type) -
667	skb_in->data,
668	len: sizeof(_inner_type),
669	buffer: &_inner_type);
670	if (!itp)
671	goto out;
672
673	/*
674	* Assume any unknown ICMP type is an error. This
675	* isn't specified by the RFC, but think about it..
676	*/
677	if (*itp > NR_ICMP_TYPES \|\|
678	icmp_pointers[*itp].error)
679	goto out;
680	}
681	}
682
683	/ Needed by both icmpv4_global_allow and icmp_xmit_lock /
684	local_bh_disable();
685
686	/ Check global sysctl_icmp_msgs_per_sec ratelimit, unless*
687	* incoming dev is loopback. If outgoing dev change to not be
688	* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
689	*/
690	if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
691	!icmpv4_global_allow(net, type, code, apply_ratelimit: &apply_ratelimit))
692	goto out_bh_enable;
693
694	sk = icmp_xmit_lock(net);
695	if (!sk)
696	goto out_bh_enable;
697
698	/*
699	* Construct source address and options.
700	*/
701
702	saddr = iph->daddr;
703	if (!(rt->rt_flags & RTCF_LOCAL)) {
704	struct net_device *dev = NULL;
705
706	rcu_read_lock();
707	if (rt_is_input_route(rt) &&
708	READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
709	dev = dev_get_by_index_rcu(net, ifindex: inet_iif(skb: skb_in));
710
711	if (dev)
712	saddr = inet_select_addr(dev, dst: iph->saddr,
713	scope: RT_SCOPE_LINK);
714	else
715	saddr = `0`;
716	rcu_read_unlock();
717	}
718
719	tos = icmp_pointers[type].error ? (RT_TOS(iph->tos) \|
720	IPTOS_PREC_INTERNETCONTROL) :
721	iph->tos;
722	mark = IP4_REPLY_MARK(net, skb_in->mark);
723
724	if (__ip_options_echo(net, dopt: &icmp_param.replyopts.opt.opt, skb: skb_in, sopt: opt))
725	goto out_unlock;
726
727
728	/*
729	* Prepare data for ICMP header.
730	*/
731
732	icmp_param.data.icmph.type = type;
733	icmp_param.data.icmph.code = code;
734	icmp_param.data.icmph.un.gateway = info;
735	icmp_param.data.icmph.checksum = `0`;
736	icmp_param.skb = skb_in;
737	icmp_param.offset = skb_network_offset(skb: skb_in);
738	ipcm_init(ipcm: &ipc);
739	ipc.tos = tos;
740	ipc.addr = iph->saddr;
741	ipc.opt = &icmp_param.replyopts.opt;
742	ipc.sockc.mark = mark;
743
744	rt = icmp_route_lookup(net, fl4: &fl4, skb_in, iph, saddr,
745	dscp: inet_dsfield_to_dscp(dsfield: tos), mark, type, code,
746	param: &icmp_param);
747	if (IS_ERR(ptr: rt))
748	goto out_unlock;
749
750	/ peer icmp_ratelimit /
751	if (!icmpv4_xrlim_allow(net, rt, fl4: &fl4, type, code, apply_ratelimit))
752	goto ende;
753
754	/ RFC says return as much as we can without exceeding 576 bytes. /
755
756	room = dst_mtu(dst: &rt->dst);
757	if (room > `576`)
758	room = `576`;
759	room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
760	room -= sizeof(struct icmphdr);
761	/ Guard against tiny mtu. We need to include at least one*
762	* IP network header for this message to make any sense.
763	*/
764	if (room <= (int)sizeof(struct iphdr))
765	goto ende;
766
767	icmp_param.data_len = skb_in->len - icmp_param.offset;
768	if (icmp_param.data_len > room)
769	icmp_param.data_len = room;
770	icmp_param.head_len = sizeof(struct icmphdr);
771
772	/ if we don't have a source address at this point, fall back to the*
773	* dummy address instead of sending out a packet with a source address
774	* of 0.0.0.0
775	*/
776	if (!fl4.saddr)
777	fl4.saddr = htonl(INADDR_DUMMY);
778
779	trace_icmp_send(skb: skb_in, type, code);
780
781	icmp_push_reply(sk, icmp_param: &icmp_param, fl4: &fl4, ipc: &ipc, rt: &rt);
782	ende:
783	ip_rt_put(rt);
784	out_unlock:
785	icmp_xmit_unlock(sk);
786	out_bh_enable:
787	local_bh_enable();
788	out:
789	rcu_read_unlock();
790	}
791	EXPORT_SYMBOL(__icmp_send);
792
793	#if IS_ENABLED(CONFIG_NF_NAT)
794	#include <net/netfilter/nf_conntrack.h>
795	void icmp_ndo_send(struct sk_buff skb_in, int* type, int code, __be32 info)
796	{
797	struct sk_buff *cloned_skb = NULL;
798	struct ip_options opts = { `0` };
799	enum ip_conntrack_info ctinfo;
800	struct nf_conn *ct;
801	__be32 orig_ip;
802
803	ct = nf_ct_get(skb: skb_in, ctinfo: &ctinfo);
804	if (!ct \|\| !(ct->status & IPS_SRC_NAT)) {
805	__icmp_send(skb_in, type, code, info, &opts);
806	return;
807	}
808
809	if (skb_shared(skb: skb_in))
810	skb_in = cloned_skb = skb_clone(skb: skb_in, GFP_ATOMIC);
811
812	if (unlikely(!skb_in \|\| skb_network_header(skb_in) < skb_in->head \|\|
813	(skb_network_header(skb_in) + sizeof(struct iphdr)) >
814	skb_tail_pointer(skb_in) \|\| skb_ensure_writable(skb_in,
815	skb_network_offset(skb_in) + sizeof(struct iphdr))))
816	goto out;
817
818	orig_ip = ip_hdr(skb: skb_in)->saddr;
819	ip_hdr(skb: skb_in)->saddr = ct->tuplehash[`0`].tuple.src.u3.ip;
820	__icmp_send(skb_in, type, code, info, &opts);
821	ip_hdr(skb: skb_in)->saddr = orig_ip;
822	out:
823	consume_skb(skb: cloned_skb);
824	}
825	EXPORT_SYMBOL(icmp_ndo_send);
826	#endif
827
828	static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
829	{
830	const struct iphdr iph = (const* struct iphdr *)skb->data;
831	const struct net_protocol *ipprot;
832	int protocol = iph->protocol;
833
834	/ Checkin full IP header plus 8 bytes of protocol to*
835	* avoid additional coding at protocol handlers.
836	*/
837	if (!pskb_may_pull(skb, len: iph->ihl * `4` + `8`)) {
838	__ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
839	return;
840	}
841
842	raw_icmp_error(skb, protocol, info);
843
844	ipprot = rcu_dereference(inet_protos[protocol]);
845	if (ipprot && ipprot->err_handler)
846	ipprot->err_handler(skb, info);
847	}
848
849	static bool icmp_tag_validation(int proto)
850	{
851	bool ok;
852
853	rcu_read_lock();
854	ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation;
855	rcu_read_unlock();
856	return ok;
857	}
858
859	/*
860	* Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
861	* ICMP_PARAMETERPROB.
862	*/
863
864	static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
865	{
866	enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
867	const struct iphdr *iph;
868	struct icmphdr *icmph;
869	struct net *net;
870	u32 info = `0`;
871
872	net = dev_net_rcu(dev: skb_dst(skb)->dev);
873
874	/*
875	* Incomplete header ?
876	* Only checks for the IP header, there should be an
877	* additional check for longer headers in upper levels.
878	*/
879
880	if (!pskb_may_pull(skb, len: sizeof(struct iphdr)))
881	goto out_err;
882
883	icmph = icmp_hdr(skb);
884	iph = (const struct iphdr *)skb->data;
885
886	if (iph->ihl < `5`) { / Mangled header, drop. /
887	reason = SKB_DROP_REASON_IP_INHDR;
888	goto out_err;
889	}
890
891	switch (icmph->type) {
892	case ICMP_DEST_UNREACH:
893	switch (icmph->code & `15`) {
894	case ICMP_NET_UNREACH:
895	case ICMP_HOST_UNREACH:
896	case ICMP_PROT_UNREACH:
897	case ICMP_PORT_UNREACH:
898	break;
899	case ICMP_FRAG_NEEDED:
900	/ for documentation of the ip_no_pmtu_disc*
901	* values please see
902	* Documentation/networking/ip-sysctl.rst
903	*/
904	switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
905	default:
906	net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
907	&iph->daddr);
908	break;
909	case `2`:
910	goto out;
911	case `3`:
912	if (!icmp_tag_validation(proto: iph->protocol))
913	goto out;
914	fallthrough;
915	case `0`:
916	info = ntohs(icmph->un.frag.mtu);
917	}
918	break;
919	case ICMP_SR_FAILED:
920	net_dbg_ratelimited("%pI4: Source Route Failed\n",
921	&iph->daddr);
922	break;
923	default:
924	break;
925	}
926	if (icmph->code > NR_ICMP_UNREACH)
927	goto out;
928	break;
929	case ICMP_PARAMETERPROB:
930	info = ntohl(icmph->un.gateway) >> `24`;
931	break;
932	case ICMP_TIME_EXCEEDED:
933	__ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
934	if (icmph->code == ICMP_EXC_FRAGTIME)
935	goto out;
936	break;
937	}
938
939	/*
940	* Throw it at our lower layers
941	*
942	* RFC 1122: 3.2.2 MUST extract the protocol ID from the passed
943	* header.
944	* RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the
945	* transport layer.
946	* RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to
947	* transport layer.
948	*/
949
950	/*
951	* Check the other end isn't violating RFC 1122. Some routers send
952	* bogus responses to broadcast frames. If you see this message
953	* first check your netmask matches at both ends, if it does then
954	* get the other vendor to fix their kit.
955	*/
956
957	if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
958	inet_addr_type_dev_table(net, dev: skb->dev, addr: iph->daddr) == RTN_BROADCAST) {
959	net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
960	&ip_hdr(skb)->saddr,
961	icmph->type, icmph->code,
962	&iph->daddr, skb->dev->name);
963	goto out;
964	}
965
966	icmp_socket_deliver(skb, info);
967
968	out:
969	return reason;
970	out_err:
971	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
972	return reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
973	}
974
975
976	/*
977	* Handle ICMP_REDIRECT.
978	*/
979
980	static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
981	{
982	if (skb->len < sizeof(struct iphdr)) {
983	__ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
984	return SKB_DROP_REASON_PKT_TOO_SMALL;
985	}
986
987	if (!pskb_may_pull(skb, len: sizeof(struct iphdr))) {
988	/ there aught to be a stat /
989	return SKB_DROP_REASON_NOMEM;
990	}
991
992	icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
993	return SKB_NOT_DROPPED_YET;
994	}
995
996	/*
997	* Handle ICMP_ECHO ("ping") and ICMP_EXT_ECHO ("PROBE") requests.
998	*
999	* RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
1000	* requests.
1001	* RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be
1002	* included in the reply.
1003	* RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring
1004	* echo requests, MUST have default=NOT.
1005	* RFC 8335: 8 MUST have a config option to enable/disable ICMP
1006	* Extended Echo Functionality, MUST be disabled by default
1007	* See also WRT handling of options once they are done and working.
1008	*/
1009
1010	static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
1011	{
1012	struct icmp_bxm icmp_param;
1013	struct net *net;
1014
1015	net = dev_net_rcu(dev: skb_dst(skb)->dev);
1016	/ should there be an ICMP stat for ignored echos? /
1017	if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
1018	return SKB_NOT_DROPPED_YET;
1019
1020	icmp_param.data.icmph = *icmp_hdr(skb);
1021	icmp_param.skb = skb;
1022	icmp_param.offset = `0`;
1023	icmp_param.data_len = skb->len;
1024	icmp_param.head_len = sizeof(struct icmphdr);
1025
1026	if (icmp_param.data.icmph.type == ICMP_ECHO)
1027	icmp_param.data.icmph.type = ICMP_ECHOREPLY;
1028	else if (!icmp_build_probe(skb, icmphdr: &icmp_param.data.icmph))
1029	return SKB_NOT_DROPPED_YET;
1030
1031	icmp_reply(icmp_param: &icmp_param, skb);
1032	return SKB_NOT_DROPPED_YET;
1033	}
1034
1035	/ Helper for icmp_echo and icmpv6_echo_reply.*
1036	* Searches for net_device that matches PROBE interface identifier
1037	* and builds PROBE reply message in icmphdr.
1038	*
1039	* Returns false if PROBE responses are disabled via sysctl
1040	*/
1041
1042	bool icmp_build_probe(struct sk_buff skb, struct* icmphdr *icmphdr)
1043	{
1044	struct net *net = dev_net_rcu(dev: skb->dev);
1045	struct icmp_ext_hdr *ext_hdr, _ext_hdr;
1046	struct icmp_ext_echo_iio *iio, _iio;
1047	struct inet6_dev *in6_dev;
1048	struct in_device *in_dev;
1049	struct net_device *dev;
1050	char buff[IFNAMSIZ];
1051	u16 ident_len;
1052	u8 status;
1053
1054	if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
1055	return false;
1056
1057	/ We currently only support probing interfaces on the proxy node*
1058	* Check to ensure L-bit is set
1059	*/
1060	if (!(ntohs(icmphdr->un.echo.sequence) & `1`))
1061	return false;
1062	/ Clear status bits in reply message /
1063	icmphdr->un.echo.sequence &= htons(`0xFF00`);
1064	if (icmphdr->type == ICMP_EXT_ECHO)
1065	icmphdr->type = ICMP_EXT_ECHOREPLY;
1066	else
1067	icmphdr->type = ICMPV6_EXT_ECHO_REPLY;
1068	ext_hdr = skb_header_pointer(skb, offset: `0`, len: sizeof(_ext_hdr), buffer: &_ext_hdr);
1069	/ Size of iio is class_type dependent.*
1070	* Only check header here and assign length based on ctype in the switch statement
1071	*/
1072	iio = skb_header_pointer(skb, offset: sizeof(_ext_hdr), len: sizeof(iio->extobj_hdr), buffer: &_iio);
1073	if (!ext_hdr \|\| !iio)
1074	goto send_mal_query;
1075	if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) \|\|
1076	ntohs(iio->extobj_hdr.length) > sizeof(_iio))
1077	goto send_mal_query;
1078	ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
1079	iio = skb_header_pointer(skb, offset: sizeof(_ext_hdr),
1080	len: sizeof(iio->extobj_hdr) + ident_len, buffer: &_iio);
1081	if (!iio)
1082	goto send_mal_query;
1083
1084	status = `0`;
1085	dev = NULL;
1086	switch (iio->extobj_hdr.class_type) {
1087	case ICMP_EXT_ECHO_CTYPE_NAME:
1088	if (ident_len >= IFNAMSIZ)
1089	goto send_mal_query;
1090	memset(buff, `0`, sizeof(buff));
1091	memcpy(buff, &iio->ident.name, ident_len);
1092	dev = dev_get_by_name(net, name: buff);
1093	break;
1094	case ICMP_EXT_ECHO_CTYPE_INDEX:
1095	if (ident_len != sizeof(iio->ident.ifindex))
1096	goto send_mal_query;
1097	dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
1098	break;
1099	case ICMP_EXT_ECHO_CTYPE_ADDR:
1100	if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) \|\|
1101	ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
1102	iio->ident.addr.ctype3_hdr.addrlen)
1103	goto send_mal_query;
1104	switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
1105	case ICMP_AFI_IP:
1106	if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr))
1107	goto send_mal_query;
1108	dev = ip_dev_find(net, addr: iio->ident.addr.ip_addr.ipv4_addr);
1109	break;
1110	#if IS_ENABLED(CONFIG_IPV6)
1111	case ICMP_AFI_IP6:
1112	if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr))
1113	goto send_mal_query;
1114	dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
1115	dev_hold(dev);
1116	break;
1117	#endif
1118	default:
1119	goto send_mal_query;
1120	}
1121	break;
1122	default:
1123	goto send_mal_query;
1124	}
1125	if (!dev) {
1126	icmphdr->code = ICMP_EXT_CODE_NO_IF;
1127	return true;
1128	}
1129	/ Fill bits in reply message /
1130	if (dev->flags & IFF_UP)
1131	status \|= ICMP_EXT_ECHOREPLY_ACTIVE;
1132
1133	in_dev = __in_dev_get_rcu(dev);
1134	if (in_dev && rcu_access_pointer(in_dev->ifa_list))
1135	status \|= ICMP_EXT_ECHOREPLY_IPV4;
1136
1137	in6_dev = __in6_dev_get(dev);
1138	if (in6_dev && !list_empty(head: &in6_dev->addr_list))
1139	status \|= ICMP_EXT_ECHOREPLY_IPV6;
1140
1141	dev_put(dev);
1142	icmphdr->un.echo.sequence \|= htons(status);
1143	return true;
1144	send_mal_query:
1145	icmphdr->code = ICMP_EXT_CODE_MAL_QUERY;
1146	return true;
1147	}
1148	EXPORT_SYMBOL_GPL(icmp_build_probe);
1149
1150	/*
1151	* Handle ICMP Timestamp requests.
1152	* RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests.
1153	* SHOULD be in the kernel for minimum random latency.
1154	* MUST be accurate to a few minutes.
1155	* MUST be updated at least at 15Hz.
1156	*/
1157	static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
1158	{
1159	struct icmp_bxm icmp_param;
1160	/*
1161	* Too short.
1162	*/
1163	if (skb->len < `4`)
1164	goto out_err;
1165
1166	/*
1167	* Fill in the current time as ms since midnight UT:
1168	*/
1169	icmp_param.data.times[`1`] = inet_current_timestamp();
1170	icmp_param.data.times[`2`] = icmp_param.data.times[`1`];
1171
1172	BUG_ON(skb_copy_bits(skb, `0`, &icmp_param.data.times[`0`], `4`));
1173
1174	icmp_param.data.icmph = *icmp_hdr(skb);
1175	icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
1176	icmp_param.data.icmph.code = `0`;
1177	icmp_param.skb = skb;
1178	icmp_param.offset = `0`;
1179	icmp_param.data_len = `0`;
1180	icmp_param.head_len = sizeof(struct icmphdr) + `12`;
1181	icmp_reply(icmp_param: &icmp_param, skb);
1182	return SKB_NOT_DROPPED_YET;
1183
1184	out_err:
1185	__ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
1186	return SKB_DROP_REASON_PKT_TOO_SMALL;
1187	}
1188
1189	static enum skb_drop_reason icmp_discard(struct sk_buff *skb)
1190	{
1191	/ pretend it was a success /
1192	return SKB_NOT_DROPPED_YET;
1193	}
1194
1195	/*
1196	* Deal with incoming ICMP packets.
1197	*/
1198	int icmp_rcv(struct sk_buff *skb)
1199	{
1200	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
1201	struct rtable *rt = skb_rtable(skb);
1202	struct net *net = dev_net_rcu(dev: rt->dst.dev);
1203	struct icmphdr *icmph;
1204
1205	if (!xfrm4_policy_check(NULL, dir: XFRM_POLICY_IN, skb)) {
1206	struct sec_path *sp = skb_sec_path(skb);
1207	int nh;
1208
1209	if (!(sp && sp->xvec[sp->len - `1`]->props.flags &
1210	XFRM_STATE_ICMP)) {
1211	reason = SKB_DROP_REASON_XFRM_POLICY;
1212	goto drop;
1213	}
1214
1215	if (!pskb_may_pull(skb, len: sizeof(icmph) + sizeof(struct* iphdr)))
1216	goto drop;
1217
1218	nh = skb_network_offset(skb);
1219	skb_set_network_header(skb, offset: sizeof(*icmph));
1220
1221	if (!xfrm4_policy_check_reverse(NULL, dir: XFRM_POLICY_IN,
1222	skb)) {
1223	reason = SKB_DROP_REASON_XFRM_POLICY;
1224	goto drop;
1225	}
1226
1227	skb_set_network_header(skb, offset: nh);
1228	}
1229
1230	__ICMP_INC_STATS(net, ICMP_MIB_INMSGS);
1231
1232	if (skb_checksum_simple_validate(skb))
1233	goto csum_error;
1234
1235	if (!pskb_pull(skb, len: sizeof(*icmph)))
1236	goto error;
1237
1238	icmph = icmp_hdr(skb);
1239
1240	ICMPMSGIN_INC_STATS(net, icmph->type);
1241
1242	/ Check for ICMP Extended Echo (PROBE) messages /
1243	if (icmph->type == ICMP_EXT_ECHO) {
1244	/ We can't use icmp_pointers[].handler() because it is an array of*
1245	* size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
1246	*/
1247	reason = icmp_echo(skb);
1248	goto reason_check;
1249	}
1250
1251	/*
1252	* Parse the ICMP message
1253	*/
1254
1255	if (rt->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST)) {
1256	/*
1257	* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
1258	* silently ignored (we let user decide with a sysctl).
1259	* RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
1260	* discarded if to broadcast/multicast.
1261	*/
1262	if ((icmph->type == ICMP_ECHO \|\|
1263	icmph->type == ICMP_TIMESTAMP) &&
1264	READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {
1265	reason = SKB_DROP_REASON_INVALID_PROTO;
1266	goto error;
1267	}
1268	if (icmph->type != ICMP_ECHO &&
1269	icmph->type != ICMP_TIMESTAMP &&
1270	icmph->type != ICMP_ADDRESS &&
1271	icmph->type != ICMP_ADDRESSREPLY) {
1272	reason = SKB_DROP_REASON_INVALID_PROTO;
1273	goto error;
1274	}
1275	}
1276
1277	if (icmph->type == ICMP_EXT_ECHOREPLY \|\|
1278	icmph->type == ICMP_ECHOREPLY) {
1279	reason = ping_rcv(skb);
1280	return reason ? NET_RX_DROP : NET_RX_SUCCESS;
1281	}
1282
1283	/*
1284	* 18 is the highest 'known' ICMP type. Anything else is a mystery
1285	*
1286	* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
1287	* discarded.
1288	*/
1289	if (icmph->type > NR_ICMP_TYPES) {
1290	reason = SKB_DROP_REASON_UNHANDLED_PROTO;
1291	goto error;
1292	}
1293
1294	reason = icmp_pointers[icmph->type].handler(skb);
1295	reason_check:
1296	if (!reason) {
1297	consume_skb(skb);
1298	return NET_RX_SUCCESS;
1299	}
1300
1301	drop:
1302	kfree_skb_reason(skb, reason);
1303	return NET_RX_DROP;
1304	csum_error:
1305	reason = SKB_DROP_REASON_ICMP_CSUM;
1306	__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
1307	error:
1308	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
1309	goto drop;
1310	}
1311
1312	static bool ip_icmp_error_rfc4884_validate(const struct sk_buff skb, int* off)
1313	{
1314	struct icmp_extobj_hdr *objh, _objh;
1315	struct icmp_ext_hdr *exth, _exth;
1316	u16 olen;
1317
1318	exth = skb_header_pointer(skb, offset: off, len: sizeof(_exth), buffer: &_exth);
1319	if (!exth)
1320	return false;
1321	if (exth->version != `2`)
1322	return true;
1323
1324	if (exth->checksum &&
1325	csum_fold(csum: skb_checksum(skb, offset: off, len: skb->len - off, csum: `0`)))
1326	return false;
1327
1328	off += sizeof(_exth);
1329	while (off < skb->len) {
1330	objh = skb_header_pointer(skb, offset: off, len: sizeof(_objh), buffer: &_objh);
1331	if (!objh)
1332	return false;
1333
1334	olen = ntohs(objh->length);
1335	if (olen < sizeof(_objh))
1336	return false;
1337
1338	off += olen;
1339	if (off > skb->len)
1340	return false;
1341	}
1342
1343	return true;
1344	}
1345
1346	void ip_icmp_error_rfc4884(const struct sk_buff *skb,
1347	struct sock_ee_data_rfc4884 *out,
1348	int thlen, int off)
1349	{
1350	int hlen;
1351
1352	/ original datagram headers: end of icmph to payload (skb->data) /
1353	hlen = -skb_transport_offset(skb) - thlen;
1354
1355	/ per rfc 4884: minimal datagram length of 128 bytes /
1356	if (off < `128` \|\| off < hlen)
1357	return;
1358
1359	/ kernel has stripped headers: return payload offset in bytes /
1360	off -= hlen;
1361	if (off + sizeof(struct icmp_ext_hdr) > skb->len)
1362	return;
1363
1364	out->len = off;
1365
1366	if (!ip_icmp_error_rfc4884_validate(skb, off))
1367	out->flags \|= SO_EE_RFC4884_FLAG_INVALID;
1368	}
1369	EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884);
1370
1371	int icmp_err(struct sk_buff *skb, u32 info)
1372	{
1373	struct iphdr iph = (struct* iphdr *)skb->data;
1374	int offset = iph->ihl<<`2`;
1375	struct icmphdr icmph = (struct* icmphdr *)(skb->data + offset);
1376	struct net *net = dev_net_rcu(dev: skb->dev);
1377	int type = icmp_hdr(skb)->type;
1378	int code = icmp_hdr(skb)->code;
1379
1380	/*
1381	* Use ping_err to handle all icmp errors except those
1382	* triggered by ICMP_ECHOREPLY which sent from kernel.
1383	*/
1384	if (icmph->type != ICMP_ECHOREPLY) {
1385	ping_err(skb, offset, info);
1386	return `0`;
1387	}
1388
1389	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
1390	ipv4_update_pmtu(skb, net, mtu: info, oif: `0`, IPPROTO_ICMP);
1391	else if (type == ICMP_REDIRECT)
1392	ipv4_redirect(skb, net, oif: `0`, IPPROTO_ICMP);
1393
1394	return `0`;
1395	}
1396
1397	/*
1398	* This table is the definition of how we handle ICMP.
1399	*/
1400	static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + `1`] = {
1401	[ICMP_ECHOREPLY] = {
1402	.handler = ping_rcv,
1403	},
1404	[`1`] = {
1405	.handler = icmp_discard,
1406	.error = `1`,
1407	},
1408	[`2`] = {
1409	.handler = icmp_discard,
1410	.error = `1`,
1411	},
1412	[ICMP_DEST_UNREACH] = {
1413	.handler = icmp_unreach,
1414	.error = `1`,
1415	},
1416	[ICMP_SOURCE_QUENCH] = {
1417	.handler = icmp_unreach,
1418	.error = `1`,
1419	},
1420	[ICMP_REDIRECT] = {
1421	.handler = icmp_redirect,
1422	.error = `1`,
1423	},
1424	[`6`] = {
1425	.handler = icmp_discard,
1426	.error = `1`,
1427	},
1428	[`7`] = {
1429	.handler = icmp_discard,
1430	.error = `1`,
1431	},
1432	[ICMP_ECHO] = {
1433	.handler = icmp_echo,
1434	},
1435	[`9`] = {
1436	.handler = icmp_discard,
1437	.error = `1`,
1438	},
1439	[`10`] = {
1440	.handler = icmp_discard,
1441	.error = `1`,
1442	},
1443	[ICMP_TIME_EXCEEDED] = {
1444	.handler = icmp_unreach,
1445	.error = `1`,
1446	},
1447	[ICMP_PARAMETERPROB] = {
1448	.handler = icmp_unreach,
1449	.error = `1`,
1450	},
1451	[ICMP_TIMESTAMP] = {
1452	.handler = icmp_timestamp,
1453	},
1454	[ICMP_TIMESTAMPREPLY] = {
1455	.handler = icmp_discard,
1456	},
1457	[ICMP_INFO_REQUEST] = {
1458	.handler = icmp_discard,
1459	},
1460	[ICMP_INFO_REPLY] = {
1461	.handler = icmp_discard,
1462	},
1463	[ICMP_ADDRESS] = {
1464	.handler = icmp_discard,
1465	},
1466	[ICMP_ADDRESSREPLY] = {
1467	.handler = icmp_discard,
1468	},
1469	};
1470
1471	static int __net_init icmp_sk_init(struct net *net)
1472	{
1473	/ Control parameters for ECHO replies. /
1474	net->ipv4.sysctl_icmp_echo_ignore_all = `0`;
1475	net->ipv4.sysctl_icmp_echo_enable_probe = `0`;
1476	net->ipv4.sysctl_icmp_echo_ignore_broadcasts = `1`;
1477
1478	/ Control parameter - ignore bogus broadcast responses? /
1479	net->ipv4.sysctl_icmp_ignore_bogus_error_responses = `1`;
1480
1481	/*
1482	* Configurable global rate limit.
1483	*
1484	* ratelimit defines tokens/packet consumed for dst->rate_token
1485	* bucket ratemask defines which icmp types are ratelimited by
1486	* setting it's bit position.
1487	*
1488	* default:
1489	* dest unreachable (3), source quench (4),
1490	* time exceeded (11), parameter problem (12)
1491	*/
1492
1493	net->ipv4.sysctl_icmp_ratelimit = `1` * HZ;
1494	net->ipv4.sysctl_icmp_ratemask = `0x1818`;
1495	net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = `0`;
1496	net->ipv4.sysctl_icmp_msgs_per_sec = `1000`;
1497	net->ipv4.sysctl_icmp_msgs_burst = `50`;
1498
1499	return `0`;
1500	}
1501
1502	static struct pernet_operations __net_initdata icmp_sk_ops = {
1503	.init = icmp_sk_init,
1504	};
1505
1506	int __init icmp_init(void)
1507	{
1508	int err, i;
1509
1510	for_each_possible_cpu(i) {
1511	struct sock *sk;
1512
1513	err = inet_ctl_sock_create(sk: &sk, PF_INET,
1514	type: SOCK_RAW, IPPROTO_ICMP, net: &init_net);
1515	if (err < `0`)
1516	return err;
1517
1518	per_cpu(ipv4_icmp_sk, i) = sk;
1519
1520	/ Enough space for 2 64K ICMP packets, including*
1521	* sk_buff/skb_shared_info struct overhead.
1522	*/
1523	sk->sk_sndbuf = `2` * SKB_TRUESIZE(`64` * `1024`);
1524
1525	/*
1526	* Speedup sock_wfree()
1527	*/
1528	sock_set_flag(sk, flag: SOCK_USE_WRITE_QUEUE);
1529	inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1530	}
1531	return register_pernet_subsys(&icmp_sk_ops);
1532	}
1533

source code of linux/net/ipv4/icmp.c