1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef __NET_IP_TUNNELS_H |
3 | #define __NET_IP_TUNNELS_H 1 |
4 | |
5 | #include <linux/if_tunnel.h> |
6 | #include <linux/netdevice.h> |
7 | #include <linux/skbuff.h> |
8 | #include <linux/socket.h> |
9 | #include <linux/types.h> |
10 | #include <linux/u64_stats_sync.h> |
11 | #include <linux/bitops.h> |
12 | |
13 | #include <net/dsfield.h> |
14 | #include <net/gro_cells.h> |
15 | #include <net/inet_ecn.h> |
16 | #include <net/netns/generic.h> |
17 | #include <net/rtnetlink.h> |
18 | #include <net/lwtunnel.h> |
19 | #include <net/dst_cache.h> |
20 | |
21 | #if IS_ENABLED(CONFIG_IPV6) |
22 | #include <net/ipv6.h> |
23 | #include <net/ip6_fib.h> |
24 | #include <net/ip6_route.h> |
25 | #endif |
26 | |
27 | /* Keep error state on tunnel for 30 sec */ |
28 | #define IPTUNNEL_ERR_TIMEO (30*HZ) |
29 | |
30 | /* Used to memset ip_tunnel padding. */ |
31 | #define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst) |
32 | |
33 | /* Used to memset ipv4 address padding. */ |
34 | #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) |
35 | #define IP_TUNNEL_KEY_IPV4_PAD_LEN \ |
36 | (sizeof_field(struct ip_tunnel_key, u) - \ |
37 | sizeof_field(struct ip_tunnel_key, u.ipv4)) |
38 | |
39 | struct ip_tunnel_key { |
40 | __be64 tun_id; |
41 | union { |
42 | struct { |
43 | __be32 src; |
44 | __be32 dst; |
45 | } ipv4; |
46 | struct { |
47 | struct in6_addr src; |
48 | struct in6_addr dst; |
49 | } ipv6; |
50 | } u; |
51 | __be16 tun_flags; |
52 | u8 tos; /* TOS for IPv4, TC for IPv6 */ |
53 | u8 ttl; /* TTL for IPv4, HL for IPv6 */ |
54 | __be32 label; /* Flow Label for IPv6 */ |
55 | __be16 tp_src; |
56 | __be16 tp_dst; |
57 | __u8 flow_flags; |
58 | }; |
59 | |
60 | /* Flags for ip_tunnel_info mode. */ |
61 | #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ |
62 | #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ |
63 | #define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ |
64 | |
65 | /* Maximum tunnel options length. */ |
66 | #define IP_TUNNEL_OPTS_MAX \ |
67 | GENMASK((sizeof_field(struct ip_tunnel_info, \ |
68 | options_len) * BITS_PER_BYTE) - 1, 0) |
69 | |
70 | struct ip_tunnel_info { |
71 | struct ip_tunnel_key key; |
72 | #ifdef CONFIG_DST_CACHE |
73 | struct dst_cache dst_cache; |
74 | #endif |
75 | u8 options_len; |
76 | u8 mode; |
77 | }; |
78 | |
79 | /* 6rd prefix/relay information */ |
80 | #ifdef CONFIG_IPV6_SIT_6RD |
81 | struct ip_tunnel_6rd_parm { |
82 | struct in6_addr prefix; |
83 | __be32 relay_prefix; |
84 | u16 prefixlen; |
85 | u16 relay_prefixlen; |
86 | }; |
87 | #endif |
88 | |
89 | struct ip_tunnel_encap { |
90 | u16 type; |
91 | u16 flags; |
92 | __be16 sport; |
93 | __be16 dport; |
94 | }; |
95 | |
96 | struct ip_tunnel_prl_entry { |
97 | struct ip_tunnel_prl_entry __rcu *next; |
98 | __be32 addr; |
99 | u16 flags; |
100 | struct rcu_head rcu_head; |
101 | }; |
102 | |
103 | struct metadata_dst; |
104 | |
105 | struct ip_tunnel { |
106 | struct ip_tunnel __rcu *next; |
107 | struct hlist_node hash_node; |
108 | |
109 | struct net_device *dev; |
110 | netdevice_tracker dev_tracker; |
111 | |
112 | struct net *net; /* netns for packet i/o */ |
113 | |
114 | unsigned long err_time; /* Time when the last ICMP error |
115 | * arrived */ |
116 | int err_count; /* Number of arrived ICMP errors */ |
117 | |
118 | /* These four fields used only by GRE */ |
119 | u32 i_seqno; /* The last seen seqno */ |
120 | atomic_t o_seqno; /* The last output seqno */ |
121 | int tun_hlen; /* Precalculated header length */ |
122 | |
123 | /* These four fields used only by ERSPAN */ |
124 | u32 index; /* ERSPAN type II index */ |
125 | u8 erspan_ver; /* ERSPAN version */ |
126 | u8 dir; /* ERSPAN direction */ |
127 | u16 hwid; /* ERSPAN hardware ID */ |
128 | |
129 | struct dst_cache dst_cache; |
130 | |
131 | struct ip_tunnel_parm parms; |
132 | |
133 | int mlink; |
134 | int encap_hlen; /* Encap header length (FOU,GUE) */ |
135 | int hlen; /* tun_hlen + encap_hlen */ |
136 | struct ip_tunnel_encap encap; |
137 | |
138 | /* for SIT */ |
139 | #ifdef CONFIG_IPV6_SIT_6RD |
140 | struct ip_tunnel_6rd_parm ip6rd; |
141 | #endif |
142 | struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ |
143 | unsigned int prl_count; /* # of entries in PRL */ |
144 | unsigned int ip_tnl_net_id; |
145 | struct gro_cells gro_cells; |
146 | __u32 fwmark; |
147 | bool collect_md; |
148 | bool ignore_df; |
149 | }; |
150 | |
151 | struct tnl_ptk_info { |
152 | __be16 flags; |
153 | __be16 proto; |
154 | __be32 key; |
155 | __be32 seq; |
156 | int hdr_len; |
157 | }; |
158 | |
159 | #define PACKET_RCVD 0 |
160 | #define PACKET_REJECT 1 |
161 | #define PACKET_NEXT 2 |
162 | |
163 | #define IP_TNL_HASH_BITS 7 |
164 | #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) |
165 | |
166 | struct ip_tunnel_net { |
167 | struct net_device *fb_tunnel_dev; |
168 | struct rtnl_link_ops *rtnl_link_ops; |
169 | struct hlist_head tunnels[IP_TNL_HASH_SIZE]; |
170 | struct ip_tunnel __rcu *collect_md_tun; |
171 | int type; |
172 | }; |
173 | |
174 | static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, |
175 | __be32 saddr, __be32 daddr, |
176 | u8 tos, u8 ttl, __be32 label, |
177 | __be16 tp_src, __be16 tp_dst, |
178 | __be64 tun_id, __be16 tun_flags) |
179 | { |
180 | key->tun_id = tun_id; |
181 | key->u.ipv4.src = saddr; |
182 | key->u.ipv4.dst = daddr; |
183 | memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, |
184 | 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); |
185 | key->tos = tos; |
186 | key->ttl = ttl; |
187 | key->label = label; |
188 | key->tun_flags = tun_flags; |
189 | |
190 | /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of |
191 | * the upper tunnel are used. |
192 | * E.g: GRE over IPSEC, the tp_src and tp_port are zero. |
193 | */ |
194 | key->tp_src = tp_src; |
195 | key->tp_dst = tp_dst; |
196 | |
197 | /* Clear struct padding. */ |
198 | if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) |
199 | memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, |
200 | 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); |
201 | } |
202 | |
203 | static inline bool |
204 | ip_tunnel_dst_cache_usable(const struct sk_buff *skb, |
205 | const struct ip_tunnel_info *info) |
206 | { |
207 | if (skb->mark) |
208 | return false; |
209 | if (!info) |
210 | return true; |
211 | if (info->key.tun_flags & TUNNEL_NOCACHE) |
212 | return false; |
213 | |
214 | return true; |
215 | } |
216 | |
217 | static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info |
218 | *tun_info) |
219 | { |
220 | return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; |
221 | } |
222 | |
223 | static inline __be64 key32_to_tunnel_id(__be32 key) |
224 | { |
225 | #ifdef __BIG_ENDIAN |
226 | return (__force __be64)key; |
227 | #else |
228 | return (__force __be64)((__force u64)key << 32); |
229 | #endif |
230 | } |
231 | |
232 | /* Returns the least-significant 32 bits of a __be64. */ |
233 | static inline __be32 tunnel_id_to_key32(__be64 tun_id) |
234 | { |
235 | #ifdef __BIG_ENDIAN |
236 | return (__force __be32)tun_id; |
237 | #else |
238 | return (__force __be32)((__force u64)tun_id >> 32); |
239 | #endif |
240 | } |
241 | |
242 | #ifdef CONFIG_INET |
243 | |
244 | static inline void ip_tunnel_init_flow(struct flowi4 *fl4, |
245 | int proto, |
246 | __be32 daddr, __be32 saddr, |
247 | __be32 key, __u8 tos, |
248 | struct net *net, int oif, |
249 | __u32 mark, __u32 tun_inner_hash) |
250 | { |
251 | memset(fl4, 0, sizeof(*fl4)); |
252 | |
253 | if (oif) { |
254 | fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif); |
255 | /* Legacy VRF/l3mdev use case */ |
256 | fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif; |
257 | } |
258 | |
259 | fl4->daddr = daddr; |
260 | fl4->saddr = saddr; |
261 | fl4->flowi4_tos = tos; |
262 | fl4->flowi4_proto = proto; |
263 | fl4->fl4_gre_key = key; |
264 | fl4->flowi4_mark = mark; |
265 | fl4->flowi4_multipath_hash = tun_inner_hash; |
266 | } |
267 | |
268 | int ip_tunnel_init(struct net_device *dev); |
269 | void ip_tunnel_uninit(struct net_device *dev); |
270 | void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); |
271 | struct net *ip_tunnel_get_link_net(const struct net_device *dev); |
272 | int ip_tunnel_get_iflink(const struct net_device *dev); |
273 | int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, |
274 | struct rtnl_link_ops *ops, char *devname); |
275 | |
276 | void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, |
277 | struct rtnl_link_ops *ops); |
278 | |
279 | void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
280 | const struct iphdr *tnl_params, const u8 protocol); |
281 | void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
282 | const u8 proto, int tunnel_hlen); |
283 | int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); |
284 | int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, |
285 | void __user *data, int cmd); |
286 | int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); |
287 | int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); |
288 | |
289 | struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, |
290 | int link, __be16 flags, |
291 | __be32 remote, __be32 local, |
292 | __be32 key); |
293 | |
294 | int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, |
295 | const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, |
296 | bool log_ecn_error); |
297 | int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], |
298 | struct ip_tunnel_parm *p, __u32 fwmark); |
299 | int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], |
300 | struct ip_tunnel_parm *p, __u32 fwmark); |
301 | void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); |
302 | |
303 | extern const struct header_ops ip_tunnel_header_ops; |
304 | __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); |
305 | |
306 | struct ip_tunnel_encap_ops { |
307 | size_t (*encap_hlen)(struct ip_tunnel_encap *e); |
308 | int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, |
309 | u8 *protocol, struct flowi4 *fl4); |
310 | int (*err_handler)(struct sk_buff *skb, u32 info); |
311 | }; |
312 | |
313 | #define MAX_IPTUN_ENCAP_OPS 8 |
314 | |
315 | extern const struct ip_tunnel_encap_ops __rcu * |
316 | iptun_encaps[MAX_IPTUN_ENCAP_OPS]; |
317 | |
318 | int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, |
319 | unsigned int num); |
320 | int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, |
321 | unsigned int num); |
322 | |
323 | int ip_tunnel_encap_setup(struct ip_tunnel *t, |
324 | struct ip_tunnel_encap *ipencap); |
325 | |
326 | static inline bool pskb_inet_may_pull(struct sk_buff *skb) |
327 | { |
328 | int nhlen; |
329 | |
330 | switch (skb->protocol) { |
331 | #if IS_ENABLED(CONFIG_IPV6) |
332 | case htons(ETH_P_IPV6): |
333 | nhlen = sizeof(struct ipv6hdr); |
334 | break; |
335 | #endif |
336 | case htons(ETH_P_IP): |
337 | nhlen = sizeof(struct iphdr); |
338 | break; |
339 | default: |
340 | nhlen = 0; |
341 | } |
342 | |
343 | return pskb_network_may_pull(skb, nhlen); |
344 | } |
345 | |
346 | static inline int ip_encap_hlen(struct ip_tunnel_encap *e) |
347 | { |
348 | const struct ip_tunnel_encap_ops *ops; |
349 | int hlen = -EINVAL; |
350 | |
351 | if (e->type == TUNNEL_ENCAP_NONE) |
352 | return 0; |
353 | |
354 | if (e->type >= MAX_IPTUN_ENCAP_OPS) |
355 | return -EINVAL; |
356 | |
357 | rcu_read_lock(); |
358 | ops = rcu_dereference(iptun_encaps[e->type]); |
359 | if (likely(ops && ops->encap_hlen)) |
360 | hlen = ops->encap_hlen(e); |
361 | rcu_read_unlock(); |
362 | |
363 | return hlen; |
364 | } |
365 | |
366 | static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, |
367 | u8 *protocol, struct flowi4 *fl4) |
368 | { |
369 | const struct ip_tunnel_encap_ops *ops; |
370 | int ret = -EINVAL; |
371 | |
372 | if (t->encap.type == TUNNEL_ENCAP_NONE) |
373 | return 0; |
374 | |
375 | if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) |
376 | return -EINVAL; |
377 | |
378 | rcu_read_lock(); |
379 | ops = rcu_dereference(iptun_encaps[t->encap.type]); |
380 | if (likely(ops && ops->build_header)) |
381 | ret = ops->build_header(skb, &t->encap, protocol, fl4); |
382 | rcu_read_unlock(); |
383 | |
384 | return ret; |
385 | } |
386 | |
387 | /* Extract dsfield from inner protocol */ |
388 | static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, |
389 | const struct sk_buff *skb) |
390 | { |
391 | __be16 payload_protocol = skb_protocol(skb, true); |
392 | |
393 | if (payload_protocol == htons(ETH_P_IP)) |
394 | return iph->tos; |
395 | else if (payload_protocol == htons(ETH_P_IPV6)) |
396 | return ipv6_get_dsfield((const struct ipv6hdr *)iph); |
397 | else |
398 | return 0; |
399 | } |
400 | |
401 | static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, |
402 | const struct sk_buff *skb) |
403 | { |
404 | __be16 payload_protocol = skb_protocol(skb, true); |
405 | |
406 | if (payload_protocol == htons(ETH_P_IP)) |
407 | return iph->ttl; |
408 | else if (payload_protocol == htons(ETH_P_IPV6)) |
409 | return ((const struct ipv6hdr *)iph)->hop_limit; |
410 | else |
411 | return 0; |
412 | } |
413 | |
414 | /* Propogate ECN bits out */ |
415 | static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, |
416 | const struct sk_buff *skb) |
417 | { |
418 | u8 inner = ip_tunnel_get_dsfield(iph, skb); |
419 | |
420 | return INET_ECN_encapsulate(tos, inner); |
421 | } |
422 | |
423 | int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, |
424 | __be16 inner_proto, bool raw_proto, bool xnet); |
425 | |
426 | static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, |
427 | __be16 inner_proto, bool xnet) |
428 | { |
429 | return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); |
430 | } |
431 | |
432 | void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, |
433 | __be32 src, __be32 dst, u8 proto, |
434 | u8 tos, u8 ttl, __be16 df, bool xnet); |
435 | struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, |
436 | gfp_t flags); |
437 | int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, |
438 | int headroom, bool reply); |
439 | |
440 | int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); |
441 | |
442 | static inline int iptunnel_pull_offloads(struct sk_buff *skb) |
443 | { |
444 | if (skb_is_gso(skb)) { |
445 | int err; |
446 | |
447 | err = skb_unclone(skb, GFP_ATOMIC); |
448 | if (unlikely(err)) |
449 | return err; |
450 | skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> |
451 | NETIF_F_GSO_SHIFT); |
452 | } |
453 | |
454 | skb->encapsulation = 0; |
455 | return 0; |
456 | } |
457 | |
458 | static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) |
459 | { |
460 | if (pkt_len > 0) { |
461 | struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); |
462 | |
463 | u64_stats_update_begin(&tstats->syncp); |
464 | u64_stats_add(&tstats->tx_bytes, pkt_len); |
465 | u64_stats_inc(&tstats->tx_packets); |
466 | u64_stats_update_end(&tstats->syncp); |
467 | put_cpu_ptr(tstats); |
468 | } else { |
469 | struct net_device_stats *err_stats = &dev->stats; |
470 | |
471 | if (pkt_len < 0) { |
472 | err_stats->tx_errors++; |
473 | err_stats->tx_aborted_errors++; |
474 | } else { |
475 | err_stats->tx_dropped++; |
476 | } |
477 | } |
478 | } |
479 | |
480 | static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info) |
481 | { |
482 | return info + 1; |
483 | } |
484 | |
485 | static inline void ip_tunnel_info_opts_get(void *to, |
486 | const struct ip_tunnel_info *info) |
487 | { |
488 | memcpy(to, info + 1, info->options_len); |
489 | } |
490 | |
491 | static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, |
492 | const void *from, int len, |
493 | __be16 flags) |
494 | { |
495 | info->options_len = len; |
496 | if (len > 0) { |
497 | memcpy(ip_tunnel_info_opts(info), from, len); |
498 | info->key.tun_flags |= flags; |
499 | } |
500 | } |
501 | |
502 | static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) |
503 | { |
504 | return (struct ip_tunnel_info *)lwtstate->data; |
505 | } |
506 | |
507 | DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); |
508 | |
509 | /* Returns > 0 if metadata should be collected */ |
510 | static inline int ip_tunnel_collect_metadata(void) |
511 | { |
512 | return static_branch_unlikely(&ip_tunnel_metadata_cnt); |
513 | } |
514 | |
515 | void __init ip_tunnel_core_init(void); |
516 | |
517 | void ip_tunnel_need_metadata(void); |
518 | void ip_tunnel_unneed_metadata(void); |
519 | |
520 | #else /* CONFIG_INET */ |
521 | |
522 | static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) |
523 | { |
524 | return NULL; |
525 | } |
526 | |
527 | static inline void ip_tunnel_need_metadata(void) |
528 | { |
529 | } |
530 | |
531 | static inline void ip_tunnel_unneed_metadata(void) |
532 | { |
533 | } |
534 | |
535 | static inline void ip_tunnel_info_opts_get(void *to, |
536 | const struct ip_tunnel_info *info) |
537 | { |
538 | } |
539 | |
540 | static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, |
541 | const void *from, int len, |
542 | __be16 flags) |
543 | { |
544 | info->options_len = 0; |
545 | } |
546 | |
547 | #endif /* CONFIG_INET */ |
548 | |
549 | #endif /* __NET_IP_TUNNELS_H */ |
550 | |