1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
4 | */ |
5 | |
6 | #include "queueing.h" |
7 | #include "socket.h" |
8 | #include "timers.h" |
9 | #include "device.h" |
10 | #include "ratelimiter.h" |
11 | #include "peer.h" |
12 | #include "messages.h" |
13 | |
14 | #include <linux/module.h> |
15 | #include <linux/rtnetlink.h> |
16 | #include <linux/inet.h> |
17 | #include <linux/netdevice.h> |
18 | #include <linux/inetdevice.h> |
19 | #include <linux/if_arp.h> |
20 | #include <linux/icmp.h> |
21 | #include <linux/suspend.h> |
22 | #include <net/dst_metadata.h> |
23 | #include <net/gso.h> |
24 | #include <net/icmp.h> |
25 | #include <net/rtnetlink.h> |
26 | #include <net/ip_tunnels.h> |
27 | #include <net/addrconf.h> |
28 | |
29 | static LIST_HEAD(device_list); |
30 | |
31 | static int wg_open(struct net_device *dev) |
32 | { |
33 | struct in_device *dev_v4 = __in_dev_get_rtnl(dev); |
34 | struct inet6_dev *dev_v6 = __in6_dev_get(dev); |
35 | struct wg_device *wg = netdev_priv(dev); |
36 | struct wg_peer *peer; |
37 | int ret; |
38 | |
39 | if (dev_v4) { |
40 | /* At some point we might put this check near the ip_rt_send_ |
41 | * redirect call of ip_forward in net/ipv4/ip_forward.c, similar |
42 | * to the current secpath check. |
43 | */ |
44 | IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); |
45 | IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; |
46 | } |
47 | if (dev_v6) |
48 | dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; |
49 | |
50 | mutex_lock(&wg->device_update_lock); |
51 | ret = wg_socket_init(wg, port: wg->incoming_port); |
52 | if (ret < 0) |
53 | goto out; |
54 | list_for_each_entry(peer, &wg->peer_list, peer_list) { |
55 | wg_packet_send_staged_packets(peer); |
56 | if (peer->persistent_keepalive_interval) |
57 | wg_packet_send_keepalive(peer); |
58 | } |
59 | out: |
60 | mutex_unlock(lock: &wg->device_update_lock); |
61 | return ret; |
62 | } |
63 | |
64 | static int wg_pm_notification(struct notifier_block *nb, unsigned long action, void *data) |
65 | { |
66 | struct wg_device *wg; |
67 | struct wg_peer *peer; |
68 | |
69 | /* If the machine is constantly suspending and resuming, as part of |
70 | * its normal operation rather than as a somewhat rare event, then we |
71 | * don't actually want to clear keys. |
72 | */ |
73 | if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || |
74 | IS_ENABLED(CONFIG_PM_USERSPACE_AUTOSLEEP)) |
75 | return 0; |
76 | |
77 | if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE) |
78 | return 0; |
79 | |
80 | rtnl_lock(); |
81 | list_for_each_entry(wg, &device_list, device_list) { |
82 | mutex_lock(&wg->device_update_lock); |
83 | list_for_each_entry(peer, &wg->peer_list, peer_list) { |
84 | del_timer(timer: &peer->timer_zero_key_material); |
85 | wg_noise_handshake_clear(handshake: &peer->handshake); |
86 | wg_noise_keypairs_clear(keypairs: &peer->keypairs); |
87 | } |
88 | mutex_unlock(lock: &wg->device_update_lock); |
89 | } |
90 | rtnl_unlock(); |
91 | rcu_barrier(); |
92 | return 0; |
93 | } |
94 | |
95 | static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; |
96 | |
97 | static int wg_vm_notification(struct notifier_block *nb, unsigned long action, void *data) |
98 | { |
99 | struct wg_device *wg; |
100 | struct wg_peer *peer; |
101 | |
102 | rtnl_lock(); |
103 | list_for_each_entry(wg, &device_list, device_list) { |
104 | mutex_lock(&wg->device_update_lock); |
105 | list_for_each_entry(peer, &wg->peer_list, peer_list) |
106 | wg_noise_expire_current_peer_keypairs(peer); |
107 | mutex_unlock(lock: &wg->device_update_lock); |
108 | } |
109 | rtnl_unlock(); |
110 | return 0; |
111 | } |
112 | |
113 | static struct notifier_block vm_notifier = { .notifier_call = wg_vm_notification }; |
114 | |
115 | static int wg_stop(struct net_device *dev) |
116 | { |
117 | struct wg_device *wg = netdev_priv(dev); |
118 | struct wg_peer *peer; |
119 | struct sk_buff *skb; |
120 | |
121 | mutex_lock(&wg->device_update_lock); |
122 | list_for_each_entry(peer, &wg->peer_list, peer_list) { |
123 | wg_packet_purge_staged_packets(peer); |
124 | wg_timers_stop(peer); |
125 | wg_noise_handshake_clear(handshake: &peer->handshake); |
126 | wg_noise_keypairs_clear(keypairs: &peer->keypairs); |
127 | wg_noise_reset_last_sent_handshake(handshake_ns: &peer->last_sent_handshake); |
128 | } |
129 | mutex_unlock(lock: &wg->device_update_lock); |
130 | while ((skb = ptr_ring_consume(r: &wg->handshake_queue.ring)) != NULL) |
131 | kfree_skb(skb); |
132 | atomic_set(v: &wg->handshake_queue_len, i: 0); |
133 | wg_socket_reinit(wg, NULL, NULL); |
134 | return 0; |
135 | } |
136 | |
137 | static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) |
138 | { |
139 | struct wg_device *wg = netdev_priv(dev); |
140 | struct sk_buff_head packets; |
141 | struct wg_peer *peer; |
142 | struct sk_buff *next; |
143 | sa_family_t family; |
144 | u32 mtu; |
145 | int ret; |
146 | |
147 | if (unlikely(!wg_check_packet_protocol(skb))) { |
148 | ret = -EPROTONOSUPPORT; |
149 | net_dbg_ratelimited("%s: Invalid IP packet\n" , dev->name); |
150 | goto err; |
151 | } |
152 | |
153 | peer = wg_allowedips_lookup_dst(table: &wg->peer_allowedips, skb); |
154 | if (unlikely(!peer)) { |
155 | ret = -ENOKEY; |
156 | if (skb->protocol == htons(ETH_P_IP)) |
157 | net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n" , |
158 | dev->name, &ip_hdr(skb)->daddr); |
159 | else if (skb->protocol == htons(ETH_P_IPV6)) |
160 | net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n" , |
161 | dev->name, &ipv6_hdr(skb)->daddr); |
162 | goto err_icmp; |
163 | } |
164 | |
165 | family = READ_ONCE(peer->endpoint.addr.sa_family); |
166 | if (unlikely(family != AF_INET && family != AF_INET6)) { |
167 | ret = -EDESTADDRREQ; |
168 | net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n" , |
169 | dev->name, peer->internal_id); |
170 | goto err_peer; |
171 | } |
172 | |
173 | mtu = skb_valid_dst(skb) ? dst_mtu(dst: skb_dst(skb)) : dev->mtu; |
174 | |
175 | __skb_queue_head_init(list: &packets); |
176 | if (!skb_is_gso(skb)) { |
177 | skb_mark_not_on_list(skb); |
178 | } else { |
179 | struct sk_buff *segs = skb_gso_segment(skb, features: 0); |
180 | |
181 | if (IS_ERR(ptr: segs)) { |
182 | ret = PTR_ERR(ptr: segs); |
183 | goto err_peer; |
184 | } |
185 | dev_kfree_skb(skb); |
186 | skb = segs; |
187 | } |
188 | |
189 | skb_list_walk_safe(skb, skb, next) { |
190 | skb_mark_not_on_list(skb); |
191 | |
192 | skb = skb_share_check(skb, GFP_ATOMIC); |
193 | if (unlikely(!skb)) |
194 | continue; |
195 | |
196 | /* We only need to keep the original dst around for icmp, |
197 | * so at this point we're in a position to drop it. |
198 | */ |
199 | skb_dst_drop(skb); |
200 | |
201 | PACKET_CB(skb)->mtu = mtu; |
202 | |
203 | __skb_queue_tail(list: &packets, newsk: skb); |
204 | } |
205 | |
206 | spin_lock_bh(lock: &peer->staged_packet_queue.lock); |
207 | /* If the queue is getting too big, we start removing the oldest packets |
208 | * until it's small again. We do this before adding the new packet, so |
209 | * we don't remove GSO segments that are in excess. |
210 | */ |
211 | while (skb_queue_len(list_: &peer->staged_packet_queue) > MAX_STAGED_PACKETS) { |
212 | dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); |
213 | DEV_STATS_INC(dev, tx_dropped); |
214 | } |
215 | skb_queue_splice_tail(list: &packets, head: &peer->staged_packet_queue); |
216 | spin_unlock_bh(lock: &peer->staged_packet_queue.lock); |
217 | |
218 | wg_packet_send_staged_packets(peer); |
219 | |
220 | wg_peer_put(peer); |
221 | return NETDEV_TX_OK; |
222 | |
223 | err_peer: |
224 | wg_peer_put(peer); |
225 | err_icmp: |
226 | if (skb->protocol == htons(ETH_P_IP)) |
227 | icmp_ndo_send(skb_in: skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, info: 0); |
228 | else if (skb->protocol == htons(ETH_P_IPV6)) |
229 | icmpv6_ndo_send(skb_in: skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, info: 0); |
230 | err: |
231 | DEV_STATS_INC(dev, tx_errors); |
232 | kfree_skb(skb); |
233 | return ret; |
234 | } |
235 | |
236 | static const struct net_device_ops netdev_ops = { |
237 | .ndo_open = wg_open, |
238 | .ndo_stop = wg_stop, |
239 | .ndo_start_xmit = wg_xmit, |
240 | }; |
241 | |
242 | static void wg_destruct(struct net_device *dev) |
243 | { |
244 | struct wg_device *wg = netdev_priv(dev); |
245 | |
246 | rtnl_lock(); |
247 | list_del(entry: &wg->device_list); |
248 | rtnl_unlock(); |
249 | mutex_lock(&wg->device_update_lock); |
250 | rcu_assign_pointer(wg->creating_net, NULL); |
251 | wg->incoming_port = 0; |
252 | wg_socket_reinit(wg, NULL, NULL); |
253 | /* The final references are cleared in the below calls to destroy_workqueue. */ |
254 | wg_peer_remove_all(wg); |
255 | destroy_workqueue(wq: wg->handshake_receive_wq); |
256 | destroy_workqueue(wq: wg->handshake_send_wq); |
257 | destroy_workqueue(wq: wg->packet_crypt_wq); |
258 | wg_packet_queue_free(queue: &wg->handshake_queue, purge: true); |
259 | wg_packet_queue_free(queue: &wg->decrypt_queue, purge: false); |
260 | wg_packet_queue_free(queue: &wg->encrypt_queue, purge: false); |
261 | rcu_barrier(); /* Wait for all the peers to be actually freed. */ |
262 | wg_ratelimiter_uninit(); |
263 | memzero_explicit(s: &wg->static_identity, count: sizeof(wg->static_identity)); |
264 | kvfree(addr: wg->index_hashtable); |
265 | kvfree(addr: wg->peer_hashtable); |
266 | mutex_unlock(lock: &wg->device_update_lock); |
267 | |
268 | pr_debug("%s: Interface destroyed\n" , dev->name); |
269 | free_netdev(dev); |
270 | } |
271 | |
272 | static const struct device_type device_type = { .name = KBUILD_MODNAME }; |
273 | |
274 | static void wg_setup(struct net_device *dev) |
275 | { |
276 | struct wg_device *wg = netdev_priv(dev); |
277 | enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | |
278 | NETIF_F_SG | NETIF_F_GSO | |
279 | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; |
280 | const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + |
281 | max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); |
282 | |
283 | dev->netdev_ops = &netdev_ops; |
284 | dev->header_ops = &ip_tunnel_header_ops; |
285 | dev->hard_header_len = 0; |
286 | dev->addr_len = 0; |
287 | dev->needed_headroom = DATA_PACKET_HEAD_ROOM; |
288 | dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); |
289 | dev->type = ARPHRD_NONE; |
290 | dev->flags = IFF_POINTOPOINT | IFF_NOARP; |
291 | dev->priv_flags |= IFF_NO_QUEUE; |
292 | dev->features |= NETIF_F_LLTX; |
293 | dev->features |= WG_NETDEV_FEATURES; |
294 | dev->hw_features |= WG_NETDEV_FEATURES; |
295 | dev->hw_enc_features |= WG_NETDEV_FEATURES; |
296 | dev->mtu = ETH_DATA_LEN - overhead; |
297 | dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; |
298 | dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; |
299 | |
300 | SET_NETDEV_DEVTYPE(dev, &device_type); |
301 | |
302 | /* We need to keep the dst around in case of icmp replies. */ |
303 | netif_keep_dst(dev); |
304 | |
305 | memset(wg, 0, sizeof(*wg)); |
306 | wg->dev = dev; |
307 | } |
308 | |
309 | static int wg_newlink(struct net *src_net, struct net_device *dev, |
310 | struct nlattr *tb[], struct nlattr *data[], |
311 | struct netlink_ext_ack *extack) |
312 | { |
313 | struct wg_device *wg = netdev_priv(dev); |
314 | int ret = -ENOMEM; |
315 | |
316 | rcu_assign_pointer(wg->creating_net, src_net); |
317 | init_rwsem(&wg->static_identity.lock); |
318 | mutex_init(&wg->socket_update_lock); |
319 | mutex_init(&wg->device_update_lock); |
320 | wg_allowedips_init(table: &wg->peer_allowedips); |
321 | wg_cookie_checker_init(checker: &wg->cookie_checker, wg); |
322 | INIT_LIST_HEAD(list: &wg->peer_list); |
323 | wg->device_update_gen = 1; |
324 | |
325 | wg->peer_hashtable = wg_pubkey_hashtable_alloc(); |
326 | if (!wg->peer_hashtable) |
327 | return ret; |
328 | |
329 | wg->index_hashtable = wg_index_hashtable_alloc(); |
330 | if (!wg->index_hashtable) |
331 | goto err_free_peer_hashtable; |
332 | |
333 | wg->handshake_receive_wq = alloc_workqueue(fmt: "wg-kex-%s" , |
334 | flags: WQ_CPU_INTENSIVE | WQ_FREEZABLE, max_active: 0, dev->name); |
335 | if (!wg->handshake_receive_wq) |
336 | goto err_free_index_hashtable; |
337 | |
338 | wg->handshake_send_wq = alloc_workqueue(fmt: "wg-kex-%s" , |
339 | flags: WQ_UNBOUND | WQ_FREEZABLE, max_active: 0, dev->name); |
340 | if (!wg->handshake_send_wq) |
341 | goto err_destroy_handshake_receive; |
342 | |
343 | wg->packet_crypt_wq = alloc_workqueue(fmt: "wg-crypt-%s" , |
344 | flags: WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, max_active: 0, dev->name); |
345 | if (!wg->packet_crypt_wq) |
346 | goto err_destroy_handshake_send; |
347 | |
348 | ret = wg_packet_queue_init(queue: &wg->encrypt_queue, function: wg_packet_encrypt_worker, |
349 | len: MAX_QUEUED_PACKETS); |
350 | if (ret < 0) |
351 | goto err_destroy_packet_crypt; |
352 | |
353 | ret = wg_packet_queue_init(queue: &wg->decrypt_queue, function: wg_packet_decrypt_worker, |
354 | len: MAX_QUEUED_PACKETS); |
355 | if (ret < 0) |
356 | goto err_free_encrypt_queue; |
357 | |
358 | ret = wg_packet_queue_init(queue: &wg->handshake_queue, function: wg_packet_handshake_receive_worker, |
359 | len: MAX_QUEUED_INCOMING_HANDSHAKES); |
360 | if (ret < 0) |
361 | goto err_free_decrypt_queue; |
362 | |
363 | ret = wg_ratelimiter_init(); |
364 | if (ret < 0) |
365 | goto err_free_handshake_queue; |
366 | |
367 | ret = register_netdevice(dev); |
368 | if (ret < 0) |
369 | goto err_uninit_ratelimiter; |
370 | |
371 | list_add(new: &wg->device_list, head: &device_list); |
372 | |
373 | /* We wait until the end to assign priv_destructor, so that |
374 | * register_netdevice doesn't call it for us if it fails. |
375 | */ |
376 | dev->priv_destructor = wg_destruct; |
377 | |
378 | pr_debug("%s: Interface created\n" , dev->name); |
379 | return ret; |
380 | |
381 | err_uninit_ratelimiter: |
382 | wg_ratelimiter_uninit(); |
383 | err_free_handshake_queue: |
384 | wg_packet_queue_free(queue: &wg->handshake_queue, purge: false); |
385 | err_free_decrypt_queue: |
386 | wg_packet_queue_free(queue: &wg->decrypt_queue, purge: false); |
387 | err_free_encrypt_queue: |
388 | wg_packet_queue_free(queue: &wg->encrypt_queue, purge: false); |
389 | err_destroy_packet_crypt: |
390 | destroy_workqueue(wq: wg->packet_crypt_wq); |
391 | err_destroy_handshake_send: |
392 | destroy_workqueue(wq: wg->handshake_send_wq); |
393 | err_destroy_handshake_receive: |
394 | destroy_workqueue(wq: wg->handshake_receive_wq); |
395 | err_free_index_hashtable: |
396 | kvfree(addr: wg->index_hashtable); |
397 | err_free_peer_hashtable: |
398 | kvfree(addr: wg->peer_hashtable); |
399 | return ret; |
400 | } |
401 | |
402 | static struct rtnl_link_ops link_ops __read_mostly = { |
403 | .kind = KBUILD_MODNAME, |
404 | .priv_size = sizeof(struct wg_device), |
405 | .setup = wg_setup, |
406 | .newlink = wg_newlink, |
407 | }; |
408 | |
409 | static void wg_netns_pre_exit(struct net *net) |
410 | { |
411 | struct wg_device *wg; |
412 | struct wg_peer *peer; |
413 | |
414 | rtnl_lock(); |
415 | list_for_each_entry(wg, &device_list, device_list) { |
416 | if (rcu_access_pointer(wg->creating_net) == net) { |
417 | pr_debug("%s: Creating namespace exiting\n" , wg->dev->name); |
418 | netif_carrier_off(dev: wg->dev); |
419 | mutex_lock(&wg->device_update_lock); |
420 | rcu_assign_pointer(wg->creating_net, NULL); |
421 | wg_socket_reinit(wg, NULL, NULL); |
422 | list_for_each_entry(peer, &wg->peer_list, peer_list) |
423 | wg_socket_clear_peer_endpoint_src(peer); |
424 | mutex_unlock(lock: &wg->device_update_lock); |
425 | } |
426 | } |
427 | rtnl_unlock(); |
428 | } |
429 | |
430 | static struct pernet_operations pernet_ops = { |
431 | .pre_exit = wg_netns_pre_exit |
432 | }; |
433 | |
434 | int __init wg_device_init(void) |
435 | { |
436 | int ret; |
437 | |
438 | ret = register_pm_notifier(nb: &pm_notifier); |
439 | if (ret) |
440 | return ret; |
441 | |
442 | ret = register_random_vmfork_notifier(nb: &vm_notifier); |
443 | if (ret) |
444 | goto error_pm; |
445 | |
446 | ret = register_pernet_device(&pernet_ops); |
447 | if (ret) |
448 | goto error_vm; |
449 | |
450 | ret = rtnl_link_register(ops: &link_ops); |
451 | if (ret) |
452 | goto error_pernet; |
453 | |
454 | return 0; |
455 | |
456 | error_pernet: |
457 | unregister_pernet_device(&pernet_ops); |
458 | error_vm: |
459 | unregister_random_vmfork_notifier(nb: &vm_notifier); |
460 | error_pm: |
461 | unregister_pm_notifier(nb: &pm_notifier); |
462 | return ret; |
463 | } |
464 | |
465 | void wg_device_uninit(void) |
466 | { |
467 | rtnl_link_unregister(ops: &link_ops); |
468 | unregister_pernet_device(&pernet_ops); |
469 | unregister_random_vmfork_notifier(nb: &vm_notifier); |
470 | unregister_pm_notifier(nb: &pm_notifier); |
471 | rcu_barrier(); |
472 | } |
473 | |