1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * RAW - implementation of IP "raw" sockets. |
8 | * |
9 | * Authors: Ross Biro |
10 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
11 | * |
12 | * Fixes: |
13 | * Alan Cox : verify_area() fixed up |
14 | * Alan Cox : ICMP error handling |
15 | * Alan Cox : EMSGSIZE if you send too big a packet |
16 | * Alan Cox : Now uses generic datagrams and shared |
17 | * skbuff library. No more peek crashes, |
18 | * no more backlogs |
19 | * Alan Cox : Checks sk->broadcast. |
20 | * Alan Cox : Uses skb_free_datagram/skb_copy_datagram |
21 | * Alan Cox : Raw passes ip options too |
22 | * Alan Cox : Setsocketopt added |
23 | * Alan Cox : Fixed error return for broadcasts |
24 | * Alan Cox : Removed wake_up calls |
25 | * Alan Cox : Use ttl/tos |
26 | * Alan Cox : Cleaned up old debugging |
27 | * Alan Cox : Use new kernel side addresses |
28 | * Arnt Gulbrandsen : Fixed MSG_DONTROUTE in raw sockets. |
29 | * Alan Cox : BSD style RAW socket demultiplexing. |
30 | * Alan Cox : Beginnings of mrouted support. |
31 | * Alan Cox : Added IP_HDRINCL option. |
32 | * Alan Cox : Skip broadcast check if BSDism set. |
33 | * David S. Miller : New socket lookup architecture. |
34 | */ |
35 | |
36 | #include <linux/types.h> |
37 | #include <linux/atomic.h> |
38 | #include <asm/byteorder.h> |
39 | #include <asm/current.h> |
40 | #include <linux/uaccess.h> |
41 | #include <asm/ioctls.h> |
42 | #include <linux/stddef.h> |
43 | #include <linux/slab.h> |
44 | #include <linux/errno.h> |
45 | #include <linux/kernel.h> |
46 | #include <linux/export.h> |
47 | #include <linux/spinlock.h> |
48 | #include <linux/sockios.h> |
49 | #include <linux/socket.h> |
50 | #include <linux/in.h> |
51 | #include <linux/mroute.h> |
52 | #include <linux/netdevice.h> |
53 | #include <linux/in_route.h> |
54 | #include <linux/route.h> |
55 | #include <linux/skbuff.h> |
56 | #include <linux/igmp.h> |
57 | #include <net/net_namespace.h> |
58 | #include <net/dst.h> |
59 | #include <net/sock.h> |
60 | #include <linux/ip.h> |
61 | #include <linux/net.h> |
62 | #include <net/ip.h> |
63 | #include <net/icmp.h> |
64 | #include <net/udp.h> |
65 | #include <net/raw.h> |
66 | #include <net/snmp.h> |
67 | #include <net/tcp_states.h> |
68 | #include <net/inet_common.h> |
69 | #include <net/checksum.h> |
70 | #include <net/xfrm.h> |
71 | #include <linux/rtnetlink.h> |
72 | #include <linux/proc_fs.h> |
73 | #include <linux/seq_file.h> |
74 | #include <linux/netfilter.h> |
75 | #include <linux/netfilter_ipv4.h> |
76 | #include <linux/compat.h> |
77 | #include <linux/uio.h> |
78 | |
79 | struct raw_frag_vec { |
80 | struct msghdr *msg; |
81 | union { |
82 | struct icmphdr icmph; |
83 | char c[1]; |
84 | } hdr; |
85 | int hlen; |
86 | }; |
87 | |
88 | struct raw_hashinfo raw_v4_hashinfo; |
89 | EXPORT_SYMBOL_GPL(raw_v4_hashinfo); |
90 | |
91 | int raw_hash_sk(struct sock *sk) |
92 | { |
93 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; |
94 | struct hlist_head *hlist; |
95 | |
96 | hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)]; |
97 | |
98 | spin_lock(lock: &h->lock); |
99 | sk_add_node_rcu(sk, list: hlist); |
100 | sock_set_flag(sk, flag: SOCK_RCU_FREE); |
101 | spin_unlock(lock: &h->lock); |
102 | sock_prot_inuse_add(net: sock_net(sk), prot: sk->sk_prot, val: 1); |
103 | |
104 | return 0; |
105 | } |
106 | EXPORT_SYMBOL_GPL(raw_hash_sk); |
107 | |
108 | void raw_unhash_sk(struct sock *sk) |
109 | { |
110 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; |
111 | |
112 | spin_lock(lock: &h->lock); |
113 | if (sk_del_node_init_rcu(sk)) |
114 | sock_prot_inuse_add(net: sock_net(sk), prot: sk->sk_prot, val: -1); |
115 | spin_unlock(lock: &h->lock); |
116 | } |
117 | EXPORT_SYMBOL_GPL(raw_unhash_sk); |
118 | |
119 | bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, |
120 | __be32 raddr, __be32 laddr, int dif, int sdif) |
121 | { |
122 | const struct inet_sock *inet = inet_sk(sk); |
123 | |
124 | if (net_eq(net1: sock_net(sk), net2: net) && inet->inet_num == num && |
125 | !(inet->inet_daddr && inet->inet_daddr != raddr) && |
126 | !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && |
127 | raw_sk_bound_dev_eq(net, bound_dev_if: sk->sk_bound_dev_if, dif, sdif)) |
128 | return true; |
129 | return false; |
130 | } |
131 | EXPORT_SYMBOL_GPL(raw_v4_match); |
132 | |
133 | /* |
134 | * 0 - deliver |
135 | * 1 - block |
136 | */ |
137 | static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) |
138 | { |
139 | struct icmphdr _hdr; |
140 | const struct icmphdr *hdr; |
141 | |
142 | hdr = skb_header_pointer(skb, offset: skb_transport_offset(skb), |
143 | len: sizeof(_hdr), buffer: &_hdr); |
144 | if (!hdr) |
145 | return 1; |
146 | |
147 | if (hdr->type < 32) { |
148 | __u32 data = raw_sk(sk)->filter.data; |
149 | |
150 | return ((1U << hdr->type) & data) != 0; |
151 | } |
152 | |
153 | /* Do not block unknown ICMP types */ |
154 | return 0; |
155 | } |
156 | |
157 | /* IP input processing comes here for RAW socket delivery. |
158 | * Caller owns SKB, so we must make clones. |
159 | * |
160 | * RFC 1122: SHOULD pass TOS value up to the transport layer. |
161 | * -> It does. And not only TOS, but all IP header. |
162 | */ |
163 | static int raw_v4_input(struct net *net, struct sk_buff *skb, |
164 | const struct iphdr *iph, int hash) |
165 | { |
166 | int sdif = inet_sdif(skb); |
167 | struct hlist_head *hlist; |
168 | int dif = inet_iif(skb); |
169 | int delivered = 0; |
170 | struct sock *sk; |
171 | |
172 | hlist = &raw_v4_hashinfo.ht[hash]; |
173 | rcu_read_lock(); |
174 | sk_for_each_rcu(sk, hlist) { |
175 | if (!raw_v4_match(net, sk, num: iph->protocol, |
176 | raddr: iph->saddr, laddr: iph->daddr, dif, sdif)) |
177 | continue; |
178 | delivered = 1; |
179 | if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && |
180 | ip_mc_sf_allow(sk, local: iph->daddr, rmt: iph->saddr, |
181 | dif: skb->dev->ifindex, sdif)) { |
182 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
183 | |
184 | /* Not releasing hash table! */ |
185 | if (clone) |
186 | raw_rcv(sk, clone); |
187 | } |
188 | } |
189 | rcu_read_unlock(); |
190 | return delivered; |
191 | } |
192 | |
193 | int raw_local_deliver(struct sk_buff *skb, int protocol) |
194 | { |
195 | struct net *net = dev_net(dev: skb->dev); |
196 | |
197 | return raw_v4_input(net, skb, ip_hdr(skb), |
198 | raw_hashfunc(net, protocol)); |
199 | } |
200 | |
201 | static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) |
202 | { |
203 | struct inet_sock *inet = inet_sk(sk); |
204 | const int type = icmp_hdr(skb)->type; |
205 | const int code = icmp_hdr(skb)->code; |
206 | int err = 0; |
207 | int harderr = 0; |
208 | |
209 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) |
210 | ipv4_sk_update_pmtu(skb, sk, mtu: info); |
211 | else if (type == ICMP_REDIRECT) { |
212 | ipv4_sk_redirect(skb, sk); |
213 | return; |
214 | } |
215 | |
216 | /* Report error on raw socket, if: |
217 | 1. User requested ip_recverr. |
218 | 2. Socket is connected (otherwise the error indication |
219 | is useless without ip_recverr and error is hard. |
220 | */ |
221 | if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED) |
222 | return; |
223 | |
224 | switch (type) { |
225 | default: |
226 | case ICMP_TIME_EXCEEDED: |
227 | err = EHOSTUNREACH; |
228 | break; |
229 | case ICMP_SOURCE_QUENCH: |
230 | return; |
231 | case ICMP_PARAMETERPROB: |
232 | err = EPROTO; |
233 | harderr = 1; |
234 | break; |
235 | case ICMP_DEST_UNREACH: |
236 | err = EHOSTUNREACH; |
237 | if (code > NR_ICMP_UNREACH) |
238 | break; |
239 | if (code == ICMP_FRAG_NEEDED) { |
240 | harderr = inet->pmtudisc != IP_PMTUDISC_DONT; |
241 | err = EMSGSIZE; |
242 | } else { |
243 | err = icmp_err_convert[code].errno; |
244 | harderr = icmp_err_convert[code].fatal; |
245 | } |
246 | } |
247 | |
248 | if (inet->recverr) { |
249 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
250 | u8 *payload = skb->data + (iph->ihl << 2); |
251 | |
252 | if (inet->hdrincl) |
253 | payload = skb->data; |
254 | ip_icmp_error(sk, skb, err, 0, info, payload); |
255 | } |
256 | |
257 | if (inet->recverr || harderr) { |
258 | sk->sk_err = err; |
259 | sk_error_report(sk); |
260 | } |
261 | } |
262 | |
263 | void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) |
264 | { |
265 | struct net *net = dev_net(dev: skb->dev); |
266 | int dif = skb->dev->ifindex; |
267 | int sdif = inet_sdif(skb); |
268 | struct hlist_head *hlist; |
269 | const struct iphdr *iph; |
270 | struct sock *sk; |
271 | int hash; |
272 | |
273 | hash = raw_hashfunc(net, protocol); |
274 | hlist = &raw_v4_hashinfo.ht[hash]; |
275 | |
276 | rcu_read_lock(); |
277 | sk_for_each_rcu(sk, hlist) { |
278 | iph = (const struct iphdr *)skb->data; |
279 | if (!raw_v4_match(net, sk, num: iph->protocol, |
280 | raddr: iph->daddr, laddr: iph->saddr, dif, sdif)) |
281 | continue; |
282 | raw_err(sk, skb, info); |
283 | } |
284 | rcu_read_unlock(); |
285 | } |
286 | |
287 | static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) |
288 | { |
289 | enum skb_drop_reason reason; |
290 | |
291 | /* Charge it to the socket. */ |
292 | |
293 | ipv4_pktinfo_prepare(sk, skb); |
294 | if (sock_queue_rcv_skb_reason(sk, skb, reason: &reason) < 0) { |
295 | kfree_skb_reason(skb, reason); |
296 | return NET_RX_DROP; |
297 | } |
298 | |
299 | return NET_RX_SUCCESS; |
300 | } |
301 | |
302 | int raw_rcv(struct sock *sk, struct sk_buff *skb) |
303 | { |
304 | if (!xfrm4_policy_check(sk, dir: XFRM_POLICY_IN, skb)) { |
305 | atomic_inc(v: &sk->sk_drops); |
306 | kfree_skb_reason(skb, reason: SKB_DROP_REASON_XFRM_POLICY); |
307 | return NET_RX_DROP; |
308 | } |
309 | nf_reset_ct(skb); |
310 | |
311 | skb_push(skb, len: skb->data - skb_network_header(skb)); |
312 | |
313 | raw_rcv_skb(sk, skb); |
314 | return 0; |
315 | } |
316 | |
317 | static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, |
318 | struct msghdr *msg, size_t length, |
319 | struct rtable **rtp, unsigned int flags, |
320 | const struct sockcm_cookie *sockc) |
321 | { |
322 | struct inet_sock *inet = inet_sk(sk); |
323 | struct net *net = sock_net(sk); |
324 | struct iphdr *iph; |
325 | struct sk_buff *skb; |
326 | unsigned int iphlen; |
327 | int err; |
328 | struct rtable *rt = *rtp; |
329 | int hlen, tlen; |
330 | |
331 | if (length > rt->dst.dev->mtu) { |
332 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
333 | rt->dst.dev->mtu); |
334 | return -EMSGSIZE; |
335 | } |
336 | if (length < sizeof(struct iphdr)) |
337 | return -EINVAL; |
338 | |
339 | if (flags&MSG_PROBE) |
340 | goto out; |
341 | |
342 | hlen = LL_RESERVED_SPACE(rt->dst.dev); |
343 | tlen = rt->dst.dev->needed_tailroom; |
344 | skb = sock_alloc_send_skb(sk, |
345 | size: length + hlen + tlen + 15, |
346 | noblock: flags & MSG_DONTWAIT, errcode: &err); |
347 | if (!skb) |
348 | goto error; |
349 | skb_reserve(skb, len: hlen); |
350 | |
351 | skb->priority = sk->sk_priority; |
352 | skb->mark = sockc->mark; |
353 | skb->tstamp = sockc->transmit_time; |
354 | skb_dst_set(skb, dst: &rt->dst); |
355 | *rtp = NULL; |
356 | |
357 | skb_reset_network_header(skb); |
358 | iph = ip_hdr(skb); |
359 | skb_put(skb, len: length); |
360 | |
361 | skb->ip_summed = CHECKSUM_NONE; |
362 | |
363 | skb_setup_tx_timestamp(skb, sockc->tsflags); |
364 | |
365 | if (flags & MSG_CONFIRM) |
366 | skb_set_dst_pending_confirm(skb, val: 1); |
367 | |
368 | skb->transport_header = skb->network_header; |
369 | err = -EFAULT; |
370 | if (memcpy_from_msg(data: iph, msg, len: length)) |
371 | goto error_free; |
372 | |
373 | iphlen = iph->ihl * 4; |
374 | |
375 | /* |
376 | * We don't want to modify the ip header, but we do need to |
377 | * be sure that it won't cause problems later along the network |
378 | * stack. Specifically we want to make sure that iph->ihl is a |
379 | * sane value. If ihl points beyond the length of the buffer passed |
380 | * in, reject the frame as invalid |
381 | */ |
382 | err = -EINVAL; |
383 | if (iphlen > length) |
384 | goto error_free; |
385 | |
386 | if (iphlen >= sizeof(*iph)) { |
387 | if (!iph->saddr) |
388 | iph->saddr = fl4->saddr; |
389 | iph->check = 0; |
390 | iph->tot_len = htons(length); |
391 | if (!iph->id) |
392 | ip_select_ident(net, skb, NULL); |
393 | |
394 | iph->check = ip_fast_csum(iph: (unsigned char *)iph, ihl: iph->ihl); |
395 | skb->transport_header += iphlen; |
396 | if (iph->protocol == IPPROTO_ICMP && |
397 | length >= iphlen + sizeof(struct icmphdr)) |
398 | icmp_out_count(net, type: ((struct icmphdr *) |
399 | skb_transport_header(skb))->type); |
400 | } |
401 | |
402 | err = NF_HOOK(pf: NFPROTO_IPV4, hook: NF_INET_LOCAL_OUT, |
403 | net, sk, skb, NULL, out: rt->dst.dev, |
404 | okfn: dst_output); |
405 | if (err > 0) |
406 | err = net_xmit_errno(err); |
407 | if (err) |
408 | goto error; |
409 | out: |
410 | return 0; |
411 | |
412 | error_free: |
413 | kfree_skb(skb); |
414 | error: |
415 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
416 | if (err == -ENOBUFS && !inet->recverr) |
417 | err = 0; |
418 | return err; |
419 | } |
420 | |
421 | static int raw_probe_proto_opt(struct raw_frag_vec *rfv, struct flowi4 *fl4) |
422 | { |
423 | int err; |
424 | |
425 | if (fl4->flowi4_proto != IPPROTO_ICMP) |
426 | return 0; |
427 | |
428 | /* We only need the first two bytes. */ |
429 | rfv->hlen = 2; |
430 | |
431 | err = memcpy_from_msg(data: rfv->hdr.c, msg: rfv->msg, len: rfv->hlen); |
432 | if (err) |
433 | return err; |
434 | |
435 | fl4->fl4_icmp_type = rfv->hdr.icmph.type; |
436 | fl4->fl4_icmp_code = rfv->hdr.icmph.code; |
437 | |
438 | return 0; |
439 | } |
440 | |
441 | static int raw_getfrag(void *from, char *to, int offset, int len, int odd, |
442 | struct sk_buff *skb) |
443 | { |
444 | struct raw_frag_vec *rfv = from; |
445 | |
446 | if (offset < rfv->hlen) { |
447 | int copy = min(rfv->hlen - offset, len); |
448 | |
449 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
450 | memcpy(to, from: rfv->hdr.c + offset, len: copy); |
451 | else |
452 | skb->csum = csum_block_add( |
453 | csum: skb->csum, |
454 | csum2: csum_partial_copy_nocheck(src: rfv->hdr.c + offset, |
455 | dst: to, len: copy), |
456 | offset: odd); |
457 | |
458 | odd = 0; |
459 | offset += copy; |
460 | to += copy; |
461 | len -= copy; |
462 | |
463 | if (!len) |
464 | return 0; |
465 | } |
466 | |
467 | offset -= rfv->hlen; |
468 | |
469 | return ip_generic_getfrag(from: rfv->msg, to, offset, len, odd, skb); |
470 | } |
471 | |
472 | static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) |
473 | { |
474 | struct inet_sock *inet = inet_sk(sk); |
475 | struct net *net = sock_net(sk); |
476 | struct ipcm_cookie ipc; |
477 | struct rtable *rt = NULL; |
478 | struct flowi4 fl4; |
479 | int free = 0; |
480 | __be32 daddr; |
481 | __be32 saddr; |
482 | u8 tos; |
483 | int err; |
484 | struct ip_options_data opt_copy; |
485 | struct raw_frag_vec rfv; |
486 | int hdrincl; |
487 | |
488 | err = -EMSGSIZE; |
489 | if (len > 0xFFFF) |
490 | goto out; |
491 | |
492 | /* hdrincl should be READ_ONCE(inet->hdrincl) |
493 | * but READ_ONCE() doesn't work with bit fields. |
494 | * Doing this indirectly yields the same result. |
495 | */ |
496 | hdrincl = inet->hdrincl; |
497 | hdrincl = READ_ONCE(hdrincl); |
498 | /* |
499 | * Check the flags. |
500 | */ |
501 | |
502 | err = -EOPNOTSUPP; |
503 | if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ |
504 | goto out; /* compatibility */ |
505 | |
506 | /* |
507 | * Get and verify the address. |
508 | */ |
509 | |
510 | if (msg->msg_namelen) { |
511 | DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); |
512 | err = -EINVAL; |
513 | if (msg->msg_namelen < sizeof(*usin)) |
514 | goto out; |
515 | if (usin->sin_family != AF_INET) { |
516 | pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n" , |
517 | __func__, current->comm); |
518 | err = -EAFNOSUPPORT; |
519 | if (usin->sin_family) |
520 | goto out; |
521 | } |
522 | daddr = usin->sin_addr.s_addr; |
523 | /* ANK: I did not forget to get protocol from port field. |
524 | * I just do not know, who uses this weirdness. |
525 | * IP_HDRINCL is much more convenient. |
526 | */ |
527 | } else { |
528 | err = -EDESTADDRREQ; |
529 | if (sk->sk_state != TCP_ESTABLISHED) |
530 | goto out; |
531 | daddr = inet->inet_daddr; |
532 | } |
533 | |
534 | ipcm_init_sk(ipcm: &ipc, inet); |
535 | |
536 | if (msg->msg_controllen) { |
537 | err = ip_cmsg_send(sk, msg, ipc: &ipc, allow_ipv6: false); |
538 | if (unlikely(err)) { |
539 | kfree(objp: ipc.opt); |
540 | goto out; |
541 | } |
542 | if (ipc.opt) |
543 | free = 1; |
544 | } |
545 | |
546 | saddr = ipc.addr; |
547 | ipc.addr = daddr; |
548 | |
549 | if (!ipc.opt) { |
550 | struct ip_options_rcu *inet_opt; |
551 | |
552 | rcu_read_lock(); |
553 | inet_opt = rcu_dereference(inet->inet_opt); |
554 | if (inet_opt) { |
555 | memcpy(to: &opt_copy, from: inet_opt, |
556 | len: sizeof(*inet_opt) + inet_opt->opt.optlen); |
557 | ipc.opt = &opt_copy.opt; |
558 | } |
559 | rcu_read_unlock(); |
560 | } |
561 | |
562 | if (ipc.opt) { |
563 | err = -EINVAL; |
564 | /* Linux does not mangle headers on raw sockets, |
565 | * so that IP options + IP_HDRINCL is non-sense. |
566 | */ |
567 | if (hdrincl) |
568 | goto done; |
569 | if (ipc.opt->opt.srr) { |
570 | if (!daddr) |
571 | goto done; |
572 | daddr = ipc.opt->opt.faddr; |
573 | } |
574 | } |
575 | tos = get_rtconn_flags(&ipc, sk); |
576 | if (msg->msg_flags & MSG_DONTROUTE) |
577 | tos |= RTO_ONLINK; |
578 | |
579 | if (ipv4_is_multicast(addr: daddr)) { |
580 | if (!ipc.oif || netif_index_is_l3_master(net: sock_net(sk), ifindex: ipc.oif)) |
581 | ipc.oif = inet->mc_index; |
582 | if (!saddr) |
583 | saddr = inet->mc_addr; |
584 | } else if (!ipc.oif) { |
585 | ipc.oif = inet->uc_index; |
586 | } else if (ipv4_is_lbcast(addr: daddr) && inet->uc_index) { |
587 | /* oif is set, packet is to local broadcast |
588 | * and uc_index is set. oif is most likely set |
589 | * by sk_bound_dev_if. If uc_index != oif check if the |
590 | * oif is an L3 master and uc_index is an L3 slave. |
591 | * If so, we want to allow the send using the uc_index. |
592 | */ |
593 | if (ipc.oif != inet->uc_index && |
594 | ipc.oif == l3mdev_master_ifindex_by_index(net: sock_net(sk), |
595 | ifindex: inet->uc_index)) { |
596 | ipc.oif = inet->uc_index; |
597 | } |
598 | } |
599 | |
600 | flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, |
601 | RT_SCOPE_UNIVERSE, |
602 | hdrincl ? IPPROTO_RAW : sk->sk_protocol, |
603 | inet_sk_flowi_flags(sk) | |
604 | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), |
605 | daddr, saddr, 0, 0, sk->sk_uid); |
606 | |
607 | if (!hdrincl) { |
608 | rfv.msg = msg; |
609 | rfv.hlen = 0; |
610 | |
611 | err = raw_probe_proto_opt(rfv: &rfv, fl4: &fl4); |
612 | if (err) |
613 | goto done; |
614 | } |
615 | |
616 | security_sk_classify_flow(sk, flic: flowi4_to_flowi_common(fl4: &fl4)); |
617 | rt = ip_route_output_flow(net, flp: &fl4, sk); |
618 | if (IS_ERR(ptr: rt)) { |
619 | err = PTR_ERR(ptr: rt); |
620 | rt = NULL; |
621 | goto done; |
622 | } |
623 | |
624 | err = -EACCES; |
625 | if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, flag: SOCK_BROADCAST)) |
626 | goto done; |
627 | |
628 | if (msg->msg_flags & MSG_CONFIRM) |
629 | goto do_confirm; |
630 | back_from_confirm: |
631 | |
632 | if (hdrincl) |
633 | err = raw_send_hdrinc(sk, fl4: &fl4, msg, length: len, |
634 | rtp: &rt, flags: msg->msg_flags, sockc: &ipc.sockc); |
635 | |
636 | else { |
637 | if (!ipc.addr) |
638 | ipc.addr = fl4.daddr; |
639 | lock_sock(sk); |
640 | err = ip_append_data(sk, fl4: &fl4, getfrag: raw_getfrag, |
641 | from: &rfv, len, protolen: 0, |
642 | ipc: &ipc, rt: &rt, flags: msg->msg_flags); |
643 | if (err) |
644 | ip_flush_pending_frames(sk); |
645 | else if (!(msg->msg_flags & MSG_MORE)) { |
646 | err = ip_push_pending_frames(sk, fl4: &fl4); |
647 | if (err == -ENOBUFS && !inet->recverr) |
648 | err = 0; |
649 | } |
650 | release_sock(sk); |
651 | } |
652 | done: |
653 | if (free) |
654 | kfree(objp: ipc.opt); |
655 | ip_rt_put(rt); |
656 | |
657 | out: |
658 | if (err < 0) |
659 | return err; |
660 | return len; |
661 | |
662 | do_confirm: |
663 | if (msg->msg_flags & MSG_PROBE) |
664 | dst_confirm_neigh(dst: &rt->dst, daddr: &fl4.daddr); |
665 | if (!(msg->msg_flags & MSG_PROBE) || len) |
666 | goto back_from_confirm; |
667 | err = 0; |
668 | goto done; |
669 | } |
670 | |
671 | static void raw_close(struct sock *sk, long timeout) |
672 | { |
673 | /* |
674 | * Raw sockets may have direct kernel references. Kill them. |
675 | */ |
676 | ip_ra_control(sk, on: 0, NULL); |
677 | |
678 | sk_common_release(sk); |
679 | } |
680 | |
681 | static void raw_destroy(struct sock *sk) |
682 | { |
683 | lock_sock(sk); |
684 | ip_flush_pending_frames(sk); |
685 | release_sock(sk); |
686 | } |
687 | |
688 | /* This gets rid of all the nasties in af_inet. -DaveM */ |
689 | static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
690 | { |
691 | struct inet_sock *inet = inet_sk(sk); |
692 | struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; |
693 | struct net *net = sock_net(sk); |
694 | u32 tb_id = RT_TABLE_LOCAL; |
695 | int ret = -EINVAL; |
696 | int chk_addr_ret; |
697 | |
698 | lock_sock(sk); |
699 | if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) |
700 | goto out; |
701 | |
702 | if (sk->sk_bound_dev_if) |
703 | tb_id = l3mdev_fib_table_by_index(net, |
704 | sk->sk_bound_dev_if) ? : tb_id; |
705 | |
706 | chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); |
707 | |
708 | ret = -EADDRNOTAVAIL; |
709 | if (!inet_addr_valid_or_nonlocal(net, inet, addr: addr->sin_addr.s_addr, |
710 | addr_type: chk_addr_ret)) |
711 | goto out; |
712 | |
713 | inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; |
714 | if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) |
715 | inet->inet_saddr = 0; /* Use device */ |
716 | sk_dst_reset(sk); |
717 | ret = 0; |
718 | out: |
719 | release_sock(sk); |
720 | return ret; |
721 | } |
722 | |
723 | /* |
724 | * This should be easy, if there is something there |
725 | * we return it, otherwise we block. |
726 | */ |
727 | |
728 | static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, |
729 | int flags, int *addr_len) |
730 | { |
731 | struct inet_sock *inet = inet_sk(sk); |
732 | size_t copied = 0; |
733 | int err = -EOPNOTSUPP; |
734 | DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); |
735 | struct sk_buff *skb; |
736 | |
737 | if (flags & MSG_OOB) |
738 | goto out; |
739 | |
740 | if (flags & MSG_ERRQUEUE) { |
741 | err = ip_recv_error(sk, msg, len, addr_len); |
742 | goto out; |
743 | } |
744 | |
745 | skb = skb_recv_datagram(sk, flags, err: &err); |
746 | if (!skb) |
747 | goto out; |
748 | |
749 | copied = skb->len; |
750 | if (len < copied) { |
751 | msg->msg_flags |= MSG_TRUNC; |
752 | copied = len; |
753 | } |
754 | |
755 | err = skb_copy_datagram_msg(from: skb, offset: 0, msg, size: copied); |
756 | if (err) |
757 | goto done; |
758 | |
759 | sock_recv_cmsgs(msg, sk, skb); |
760 | |
761 | /* Copy the address. */ |
762 | if (sin) { |
763 | sin->sin_family = AF_INET; |
764 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
765 | sin->sin_port = 0; |
766 | memset(s: &sin->sin_zero, c: 0, n: sizeof(sin->sin_zero)); |
767 | *addr_len = sizeof(*sin); |
768 | } |
769 | if (inet->cmsg_flags) |
770 | ip_cmsg_recv(msg, skb); |
771 | if (flags & MSG_TRUNC) |
772 | copied = skb->len; |
773 | done: |
774 | skb_free_datagram(sk, skb); |
775 | out: |
776 | if (err) |
777 | return err; |
778 | return copied; |
779 | } |
780 | |
781 | static int raw_sk_init(struct sock *sk) |
782 | { |
783 | struct raw_sock *rp = raw_sk(sk); |
784 | |
785 | if (inet_sk(sk)->inet_num == IPPROTO_ICMP) |
786 | memset(s: &rp->filter, c: 0, n: sizeof(rp->filter)); |
787 | return 0; |
788 | } |
789 | |
790 | static int raw_seticmpfilter(struct sock *sk, sockptr_t optval, int optlen) |
791 | { |
792 | if (optlen > sizeof(struct icmp_filter)) |
793 | optlen = sizeof(struct icmp_filter); |
794 | if (copy_from_sockptr(&raw_sk(sk)->filter, optval, optlen)) |
795 | return -EFAULT; |
796 | return 0; |
797 | } |
798 | |
799 | static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen) |
800 | { |
801 | int len, ret = -EFAULT; |
802 | |
803 | if (get_user(len, optlen)) |
804 | goto out; |
805 | ret = -EINVAL; |
806 | if (len < 0) |
807 | goto out; |
808 | if (len > sizeof(struct icmp_filter)) |
809 | len = sizeof(struct icmp_filter); |
810 | ret = -EFAULT; |
811 | if (put_user(len, optlen) || |
812 | copy_to_user(to: optval, from: &raw_sk(sk)->filter, n: len)) |
813 | goto out; |
814 | ret = 0; |
815 | out: return ret; |
816 | } |
817 | |
818 | static int do_raw_setsockopt(struct sock *sk, int level, int optname, |
819 | sockptr_t optval, unsigned int optlen) |
820 | { |
821 | if (optname == ICMP_FILTER) { |
822 | if (inet_sk(sk)->inet_num != IPPROTO_ICMP) |
823 | return -EOPNOTSUPP; |
824 | else |
825 | return raw_seticmpfilter(sk, optval, optlen); |
826 | } |
827 | return -ENOPROTOOPT; |
828 | } |
829 | |
830 | static int raw_setsockopt(struct sock *sk, int level, int optname, |
831 | sockptr_t optval, unsigned int optlen) |
832 | { |
833 | if (level != SOL_RAW) |
834 | return ip_setsockopt(sk, level, optname, optval, optlen); |
835 | return do_raw_setsockopt(sk, level, optname, optval, optlen); |
836 | } |
837 | |
838 | static int do_raw_getsockopt(struct sock *sk, int level, int optname, |
839 | char __user *optval, int __user *optlen) |
840 | { |
841 | if (optname == ICMP_FILTER) { |
842 | if (inet_sk(sk)->inet_num != IPPROTO_ICMP) |
843 | return -EOPNOTSUPP; |
844 | else |
845 | return raw_geticmpfilter(sk, optval, optlen); |
846 | } |
847 | return -ENOPROTOOPT; |
848 | } |
849 | |
850 | static int raw_getsockopt(struct sock *sk, int level, int optname, |
851 | char __user *optval, int __user *optlen) |
852 | { |
853 | if (level != SOL_RAW) |
854 | return ip_getsockopt(sk, level, optname, optval, optlen); |
855 | return do_raw_getsockopt(sk, level, optname, optval, optlen); |
856 | } |
857 | |
858 | static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) |
859 | { |
860 | switch (cmd) { |
861 | case SIOCOUTQ: { |
862 | int amount = sk_wmem_alloc_get(sk); |
863 | |
864 | return put_user(amount, (int __user *)arg); |
865 | } |
866 | case SIOCINQ: { |
867 | struct sk_buff *skb; |
868 | int amount = 0; |
869 | |
870 | spin_lock_bh(lock: &sk->sk_receive_queue.lock); |
871 | skb = skb_peek(list_: &sk->sk_receive_queue); |
872 | if (skb) |
873 | amount = skb->len; |
874 | spin_unlock_bh(lock: &sk->sk_receive_queue.lock); |
875 | return put_user(amount, (int __user *)arg); |
876 | } |
877 | |
878 | default: |
879 | #ifdef CONFIG_IP_MROUTE |
880 | return ipmr_ioctl(sk, cmd, (void __user *)arg); |
881 | #else |
882 | return -ENOIOCTLCMD; |
883 | #endif |
884 | } |
885 | } |
886 | |
887 | #ifdef CONFIG_COMPAT |
888 | static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) |
889 | { |
890 | switch (cmd) { |
891 | case SIOCOUTQ: |
892 | case SIOCINQ: |
893 | return -ENOIOCTLCMD; |
894 | default: |
895 | #ifdef CONFIG_IP_MROUTE |
896 | return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg)); |
897 | #else |
898 | return -ENOIOCTLCMD; |
899 | #endif |
900 | } |
901 | } |
902 | #endif |
903 | |
904 | int raw_abort(struct sock *sk, int err) |
905 | { |
906 | lock_sock(sk); |
907 | |
908 | sk->sk_err = err; |
909 | sk_error_report(sk); |
910 | __udp_disconnect(sk, flags: 0); |
911 | |
912 | release_sock(sk); |
913 | |
914 | return 0; |
915 | } |
916 | EXPORT_SYMBOL_GPL(raw_abort); |
917 | |
918 | struct proto raw_prot = { |
919 | .name = "RAW" , |
920 | .owner = THIS_MODULE, |
921 | .close = raw_close, |
922 | .destroy = raw_destroy, |
923 | .connect = ip4_datagram_connect, |
924 | .disconnect = __udp_disconnect, |
925 | .ioctl = raw_ioctl, |
926 | .init = raw_sk_init, |
927 | .setsockopt = raw_setsockopt, |
928 | .getsockopt = raw_getsockopt, |
929 | .sendmsg = raw_sendmsg, |
930 | .recvmsg = raw_recvmsg, |
931 | .bind = raw_bind, |
932 | .backlog_rcv = raw_rcv_skb, |
933 | .release_cb = ip4_datagram_release_cb, |
934 | .hash = raw_hash_sk, |
935 | .unhash = raw_unhash_sk, |
936 | .obj_size = sizeof(struct raw_sock), |
937 | .useroffset = offsetof(struct raw_sock, filter), |
938 | .usersize = sizeof_field(struct raw_sock, filter), |
939 | .h.raw_hash = &raw_v4_hashinfo, |
940 | #ifdef CONFIG_COMPAT |
941 | .compat_ioctl = compat_raw_ioctl, |
942 | #endif |
943 | .diag_destroy = raw_abort, |
944 | }; |
945 | |
946 | #ifdef CONFIG_PROC_FS |
947 | static struct sock *raw_get_first(struct seq_file *seq, int bucket) |
948 | { |
949 | struct raw_hashinfo *h = pde_data(file_inode(seq->file)); |
950 | struct raw_iter_state *state = raw_seq_private(seq); |
951 | struct hlist_head *hlist; |
952 | struct sock *sk; |
953 | |
954 | for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; |
955 | ++state->bucket) { |
956 | hlist = &h->ht[state->bucket]; |
957 | sk_for_each(sk, hlist) { |
958 | if (sock_net(sk) == seq_file_net(seq)) |
959 | return sk; |
960 | } |
961 | } |
962 | return NULL; |
963 | } |
964 | |
965 | static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) |
966 | { |
967 | struct raw_iter_state *state = raw_seq_private(seq); |
968 | |
969 | do { |
970 | sk = sk_next(sk); |
971 | } while (sk && sock_net(sk) != seq_file_net(seq)); |
972 | |
973 | if (!sk) |
974 | return raw_get_first(seq, state->bucket + 1); |
975 | return sk; |
976 | } |
977 | |
978 | static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) |
979 | { |
980 | struct sock *sk = raw_get_first(seq, 0); |
981 | |
982 | if (sk) |
983 | while (pos && (sk = raw_get_next(seq, sk)) != NULL) |
984 | --pos; |
985 | return pos ? NULL : sk; |
986 | } |
987 | |
988 | void *raw_seq_start(struct seq_file *seq, loff_t *pos) |
989 | __acquires(&h->lock) |
990 | { |
991 | struct raw_hashinfo *h = pde_data(file_inode(seq->file)); |
992 | |
993 | spin_lock(&h->lock); |
994 | |
995 | return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
996 | } |
997 | EXPORT_SYMBOL_GPL(raw_seq_start); |
998 | |
999 | void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
1000 | { |
1001 | struct sock *sk; |
1002 | |
1003 | if (v == SEQ_START_TOKEN) |
1004 | sk = raw_get_first(seq, 0); |
1005 | else |
1006 | sk = raw_get_next(seq, v); |
1007 | ++*pos; |
1008 | return sk; |
1009 | } |
1010 | EXPORT_SYMBOL_GPL(raw_seq_next); |
1011 | |
1012 | void raw_seq_stop(struct seq_file *seq, void *v) |
1013 | __releases(&h->lock) |
1014 | { |
1015 | struct raw_hashinfo *h = pde_data(file_inode(seq->file)); |
1016 | |
1017 | spin_unlock(&h->lock); |
1018 | } |
1019 | EXPORT_SYMBOL_GPL(raw_seq_stop); |
1020 | |
1021 | static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) |
1022 | { |
1023 | struct inet_sock *inet = inet_sk(sp); |
1024 | __be32 dest = inet->inet_daddr, |
1025 | src = inet->inet_rcv_saddr; |
1026 | __u16 destp = 0, |
1027 | srcp = inet->inet_num; |
1028 | |
1029 | seq_printf(seq, "%4d: %08X:%04X %08X:%04X" |
1030 | " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n" , |
1031 | i, src, srcp, dest, destp, sp->sk_state, |
1032 | sk_wmem_alloc_get(sp), |
1033 | sk_rmem_alloc_get(sp), |
1034 | 0, 0L, 0, |
1035 | from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), |
1036 | 0, sock_i_ino(sp), |
1037 | refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); |
1038 | } |
1039 | |
1040 | static int raw_seq_show(struct seq_file *seq, void *v) |
1041 | { |
1042 | if (v == SEQ_START_TOKEN) |
1043 | seq_printf(seq, " sl local_address rem_address st tx_queue " |
1044 | "rx_queue tr tm->when retrnsmt uid timeout " |
1045 | "inode ref pointer drops\n" ); |
1046 | else |
1047 | raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); |
1048 | return 0; |
1049 | } |
1050 | |
1051 | static const struct seq_operations raw_seq_ops = { |
1052 | .start = raw_seq_start, |
1053 | .next = raw_seq_next, |
1054 | .stop = raw_seq_stop, |
1055 | .show = raw_seq_show, |
1056 | }; |
1057 | |
1058 | static __net_init int raw_init_net(struct net *net) |
1059 | { |
1060 | if (!proc_create_net_data("raw" , 0444, net->proc_net, &raw_seq_ops, |
1061 | sizeof(struct raw_iter_state), &raw_v4_hashinfo)) |
1062 | return -ENOMEM; |
1063 | |
1064 | return 0; |
1065 | } |
1066 | |
1067 | static __net_exit void raw_exit_net(struct net *net) |
1068 | { |
1069 | remove_proc_entry("raw" , net->proc_net); |
1070 | } |
1071 | |
1072 | static __net_initdata struct pernet_operations raw_net_ops = { |
1073 | .init = raw_init_net, |
1074 | .exit = raw_exit_net, |
1075 | }; |
1076 | |
1077 | int __init raw_proc_init(void) |
1078 | { |
1079 | |
1080 | return register_pernet_subsys(&raw_net_ops); |
1081 | } |
1082 | |
1083 | void __init raw_proc_exit(void) |
1084 | { |
1085 | unregister_pernet_subsys(&raw_net_ops); |
1086 | } |
1087 | #endif /* CONFIG_PROC_FS */ |
1088 | |
1089 | static void raw_sysctl_init_net(struct net *net) |
1090 | { |
1091 | #ifdef CONFIG_NET_L3_MASTER_DEV |
1092 | net->ipv4.sysctl_raw_l3mdev_accept = 1; |
1093 | #endif |
1094 | } |
1095 | |
1096 | static int __net_init raw_sysctl_init(struct net *net) |
1097 | { |
1098 | raw_sysctl_init_net(net); |
1099 | return 0; |
1100 | } |
1101 | |
1102 | static struct pernet_operations __net_initdata raw_sysctl_ops = { |
1103 | .init = raw_sysctl_init, |
1104 | }; |
1105 | |
1106 | void __init raw_init(void) |
1107 | { |
1108 | raw_sysctl_init_net(net: &init_net); |
1109 | if (register_pernet_subsys(&raw_sysctl_ops)) |
1110 | panic(fmt: "RAW: failed to init sysctl parameters.\n" ); |
1111 | } |
1112 | |