1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
4 | */ |
5 | |
6 | #include "queueing.h" |
7 | #include "timers.h" |
8 | #include "device.h" |
9 | #include "peer.h" |
10 | #include "socket.h" |
11 | #include "messages.h" |
12 | #include "cookie.h" |
13 | |
14 | #include <linux/uio.h> |
15 | #include <linux/inetdevice.h> |
16 | #include <linux/socket.h> |
17 | #include <net/ip_tunnels.h> |
18 | #include <net/udp.h> |
19 | #include <net/sock.h> |
20 | |
21 | static void wg_packet_send_handshake_initiation(struct wg_peer *peer) |
22 | { |
23 | struct message_handshake_initiation packet; |
24 | |
25 | if (!wg_birthdate_has_expired(birthday_nanoseconds: atomic64_read(v: &peer->last_sent_handshake), |
26 | expiration_seconds: REKEY_TIMEOUT)) |
27 | return; /* This function is rate limited. */ |
28 | |
29 | atomic64_set(v: &peer->last_sent_handshake, i: ktime_get_coarse_boottime_ns()); |
30 | net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n" , |
31 | peer->device->dev->name, peer->internal_id, |
32 | &peer->endpoint.addr); |
33 | |
34 | if (wg_noise_handshake_create_initiation(dst: &packet, handshake: &peer->handshake)) { |
35 | wg_cookie_add_mac_to_packet(message: &packet, len: sizeof(packet), peer); |
36 | wg_timers_any_authenticated_packet_traversal(peer); |
37 | wg_timers_any_authenticated_packet_sent(peer); |
38 | atomic64_set(v: &peer->last_sent_handshake, |
39 | i: ktime_get_coarse_boottime_ns()); |
40 | wg_socket_send_buffer_to_peer(peer, data: &packet, len: sizeof(packet), |
41 | ds: HANDSHAKE_DSCP); |
42 | wg_timers_handshake_initiated(peer); |
43 | } |
44 | } |
45 | |
46 | void wg_packet_handshake_send_worker(struct work_struct *work) |
47 | { |
48 | struct wg_peer *peer = container_of(work, struct wg_peer, |
49 | transmit_handshake_work); |
50 | |
51 | wg_packet_send_handshake_initiation(peer); |
52 | wg_peer_put(peer); |
53 | } |
54 | |
55 | void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, |
56 | bool is_retry) |
57 | { |
58 | if (!is_retry) |
59 | peer->timer_handshake_attempts = 0; |
60 | |
61 | rcu_read_lock_bh(); |
62 | /* We check last_sent_handshake here in addition to the actual function |
63 | * we're queueing up, so that we don't queue things if not strictly |
64 | * necessary: |
65 | */ |
66 | if (!wg_birthdate_has_expired(birthday_nanoseconds: atomic64_read(v: &peer->last_sent_handshake), |
67 | expiration_seconds: REKEY_TIMEOUT) || |
68 | unlikely(READ_ONCE(peer->is_dead))) |
69 | goto out; |
70 | |
71 | wg_peer_get(peer); |
72 | /* Queues up calling packet_send_queued_handshakes(peer), where we do a |
73 | * peer_put(peer) after: |
74 | */ |
75 | if (!queue_work(wq: peer->device->handshake_send_wq, |
76 | work: &peer->transmit_handshake_work)) |
77 | /* If the work was already queued, we want to drop the |
78 | * extra reference: |
79 | */ |
80 | wg_peer_put(peer); |
81 | out: |
82 | rcu_read_unlock_bh(); |
83 | } |
84 | |
85 | void wg_packet_send_handshake_response(struct wg_peer *peer) |
86 | { |
87 | struct message_handshake_response packet; |
88 | |
89 | atomic64_set(v: &peer->last_sent_handshake, i: ktime_get_coarse_boottime_ns()); |
90 | net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n" , |
91 | peer->device->dev->name, peer->internal_id, |
92 | &peer->endpoint.addr); |
93 | |
94 | if (wg_noise_handshake_create_response(dst: &packet, handshake: &peer->handshake)) { |
95 | wg_cookie_add_mac_to_packet(message: &packet, len: sizeof(packet), peer); |
96 | if (wg_noise_handshake_begin_session(handshake: &peer->handshake, |
97 | keypairs: &peer->keypairs)) { |
98 | wg_timers_session_derived(peer); |
99 | wg_timers_any_authenticated_packet_traversal(peer); |
100 | wg_timers_any_authenticated_packet_sent(peer); |
101 | atomic64_set(v: &peer->last_sent_handshake, |
102 | i: ktime_get_coarse_boottime_ns()); |
103 | wg_socket_send_buffer_to_peer(peer, data: &packet, |
104 | len: sizeof(packet), |
105 | ds: HANDSHAKE_DSCP); |
106 | } |
107 | } |
108 | } |
109 | |
110 | void wg_packet_send_handshake_cookie(struct wg_device *wg, |
111 | struct sk_buff *initiating_skb, |
112 | __le32 sender_index) |
113 | { |
114 | struct message_handshake_cookie packet; |
115 | |
116 | net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n" , |
117 | wg->dev->name, initiating_skb); |
118 | wg_cookie_message_create(src: &packet, skb: initiating_skb, index: sender_index, |
119 | checker: &wg->cookie_checker); |
120 | wg_socket_send_buffer_as_reply_to_skb(wg, in_skb: initiating_skb, out_buffer: &packet, |
121 | len: sizeof(packet)); |
122 | } |
123 | |
124 | static void keep_key_fresh(struct wg_peer *peer) |
125 | { |
126 | struct noise_keypair *keypair; |
127 | bool send; |
128 | |
129 | rcu_read_lock_bh(); |
130 | keypair = rcu_dereference_bh(peer->keypairs.current_keypair); |
131 | send = keypair && READ_ONCE(keypair->sending.is_valid) && |
132 | (atomic64_read(v: &keypair->sending_counter) > REKEY_AFTER_MESSAGES || |
133 | (keypair->i_am_the_initiator && |
134 | wg_birthdate_has_expired(birthday_nanoseconds: keypair->sending.birthdate, expiration_seconds: REKEY_AFTER_TIME))); |
135 | rcu_read_unlock_bh(); |
136 | |
137 | if (unlikely(send)) |
138 | wg_packet_send_queued_handshake_initiation(peer, is_retry: false); |
139 | } |
140 | |
141 | static unsigned int calculate_skb_padding(struct sk_buff *skb) |
142 | { |
143 | unsigned int padded_size, last_unit = skb->len; |
144 | |
145 | if (unlikely(!PACKET_CB(skb)->mtu)) |
146 | return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; |
147 | |
148 | /* We do this modulo business with the MTU, just in case the networking |
149 | * layer gives us a packet that's bigger than the MTU. In that case, we |
150 | * wouldn't want the final subtraction to overflow in the case of the |
151 | * padded_size being clamped. Fortunately, that's very rarely the case, |
152 | * so we optimize for that not happening. |
153 | */ |
154 | if (unlikely(last_unit > PACKET_CB(skb)->mtu)) |
155 | last_unit %= PACKET_CB(skb)->mtu; |
156 | |
157 | padded_size = min(PACKET_CB(skb)->mtu, |
158 | ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); |
159 | return padded_size - last_unit; |
160 | } |
161 | |
162 | static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) |
163 | { |
164 | unsigned int padding_len, plaintext_len, trailer_len; |
165 | struct scatterlist sg[MAX_SKB_FRAGS + 8]; |
166 | struct message_data *; |
167 | struct sk_buff *trailer; |
168 | int num_frags; |
169 | |
170 | /* Force hash calculation before encryption so that flow analysis is |
171 | * consistent over the inner packet. |
172 | */ |
173 | skb_get_hash(skb); |
174 | |
175 | /* Calculate lengths. */ |
176 | padding_len = calculate_skb_padding(skb); |
177 | trailer_len = padding_len + noise_encrypted_len(0); |
178 | plaintext_len = skb->len + padding_len; |
179 | |
180 | /* Expand data section to have room for padding and auth tag. */ |
181 | num_frags = skb_cow_data(skb, tailbits: trailer_len, trailer: &trailer); |
182 | if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) |
183 | return false; |
184 | |
185 | /* Set the padding to zeros, and make sure it and the auth tag are part |
186 | * of the skb. |
187 | */ |
188 | memset(skb_tail_pointer(trailer), 0, padding_len); |
189 | |
190 | /* Expand head section to have room for our header and the network |
191 | * stack's headers. |
192 | */ |
193 | if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) |
194 | return false; |
195 | |
196 | /* Finalize checksum calculation for the inner packet, if required. */ |
197 | if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && |
198 | skb_checksum_help(skb))) |
199 | return false; |
200 | |
201 | /* Only after checksumming can we safely add on the padding at the end |
202 | * and the header. |
203 | */ |
204 | skb_set_inner_network_header(skb, offset: 0); |
205 | header = (struct message_data *)skb_push(skb, len: sizeof(*header)); |
206 | header->header.type = cpu_to_le32(MESSAGE_DATA); |
207 | header->key_idx = keypair->remote_index; |
208 | header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); |
209 | pskb_put(skb, tail: trailer, len: trailer_len); |
210 | |
211 | /* Now we can encrypt the scattergather segments */ |
212 | sg_init_table(sg, num_frags); |
213 | if (skb_to_sgvec(skb, sg, offset: sizeof(struct message_data), |
214 | noise_encrypted_len(plaintext_len)) <= 0) |
215 | return false; |
216 | return chacha20poly1305_encrypt_sg_inplace(src: sg, src_len: plaintext_len, NULL, ad_len: 0, |
217 | PACKET_CB(skb)->nonce, |
218 | key: keypair->sending.key); |
219 | } |
220 | |
221 | void wg_packet_send_keepalive(struct wg_peer *peer) |
222 | { |
223 | struct sk_buff *skb; |
224 | |
225 | if (skb_queue_empty(list: &peer->staged_packet_queue)) { |
226 | skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, |
227 | GFP_ATOMIC); |
228 | if (unlikely(!skb)) |
229 | return; |
230 | skb_reserve(skb, DATA_PACKET_HEAD_ROOM); |
231 | skb->dev = peer->device->dev; |
232 | PACKET_CB(skb)->mtu = skb->dev->mtu; |
233 | skb_queue_tail(list: &peer->staged_packet_queue, newsk: skb); |
234 | net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n" , |
235 | peer->device->dev->name, peer->internal_id, |
236 | &peer->endpoint.addr); |
237 | } |
238 | |
239 | wg_packet_send_staged_packets(peer); |
240 | } |
241 | |
242 | static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first) |
243 | { |
244 | struct sk_buff *skb, *next; |
245 | bool is_keepalive, data_sent = false; |
246 | |
247 | wg_timers_any_authenticated_packet_traversal(peer); |
248 | wg_timers_any_authenticated_packet_sent(peer); |
249 | skb_list_walk_safe(first, skb, next) { |
250 | is_keepalive = skb->len == message_data_len(0); |
251 | if (likely(!wg_socket_send_skb_to_peer(peer, skb, |
252 | PACKET_CB(skb)->ds) && !is_keepalive)) |
253 | data_sent = true; |
254 | } |
255 | |
256 | if (likely(data_sent)) |
257 | wg_timers_data_sent(peer); |
258 | |
259 | keep_key_fresh(peer); |
260 | } |
261 | |
262 | void wg_packet_tx_worker(struct work_struct *work) |
263 | { |
264 | struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work); |
265 | struct noise_keypair *keypair; |
266 | enum packet_state state; |
267 | struct sk_buff *first; |
268 | |
269 | while ((first = wg_prev_queue_peek(queue: &peer->tx_queue)) != NULL && |
270 | (state = atomic_read_acquire(v: &PACKET_CB(first)->state)) != |
271 | PACKET_STATE_UNCRYPTED) { |
272 | wg_prev_queue_drop_peeked(queue: &peer->tx_queue); |
273 | keypair = PACKET_CB(first)->keypair; |
274 | |
275 | if (likely(state == PACKET_STATE_CRYPTED)) |
276 | wg_packet_create_data_done(peer, first); |
277 | else |
278 | kfree_skb_list(segs: first); |
279 | |
280 | wg_noise_keypair_put(keypair, unreference_now: false); |
281 | wg_peer_put(peer); |
282 | if (need_resched()) |
283 | cond_resched(); |
284 | } |
285 | } |
286 | |
287 | void wg_packet_encrypt_worker(struct work_struct *work) |
288 | { |
289 | struct crypt_queue *queue = container_of(work, struct multicore_worker, |
290 | work)->ptr; |
291 | struct sk_buff *first, *skb, *next; |
292 | |
293 | while ((first = ptr_ring_consume_bh(r: &queue->ring)) != NULL) { |
294 | enum packet_state state = PACKET_STATE_CRYPTED; |
295 | |
296 | skb_list_walk_safe(first, skb, next) { |
297 | if (likely(encrypt_packet(skb, |
298 | PACKET_CB(first)->keypair))) { |
299 | wg_reset_packet(skb, encapsulating: true); |
300 | } else { |
301 | state = PACKET_STATE_DEAD; |
302 | break; |
303 | } |
304 | } |
305 | wg_queue_enqueue_per_peer_tx(skb: first, state); |
306 | if (need_resched()) |
307 | cond_resched(); |
308 | } |
309 | } |
310 | |
311 | static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) |
312 | { |
313 | struct wg_device *wg = peer->device; |
314 | int ret = -EINVAL; |
315 | |
316 | rcu_read_lock_bh(); |
317 | if (unlikely(READ_ONCE(peer->is_dead))) |
318 | goto err; |
319 | |
320 | ret = wg_queue_enqueue_per_device_and_peer(device_queue: &wg->encrypt_queue, peer_queue: &peer->tx_queue, skb: first, |
321 | wq: wg->packet_crypt_wq); |
322 | if (unlikely(ret == -EPIPE)) |
323 | wg_queue_enqueue_per_peer_tx(skb: first, state: PACKET_STATE_DEAD); |
324 | err: |
325 | rcu_read_unlock_bh(); |
326 | if (likely(!ret || ret == -EPIPE)) |
327 | return; |
328 | wg_noise_keypair_put(PACKET_CB(first)->keypair, unreference_now: false); |
329 | wg_peer_put(peer); |
330 | kfree_skb_list(segs: first); |
331 | } |
332 | |
333 | void wg_packet_purge_staged_packets(struct wg_peer *peer) |
334 | { |
335 | spin_lock_bh(lock: &peer->staged_packet_queue.lock); |
336 | DEV_STATS_ADD(peer->device->dev, tx_dropped, |
337 | peer->staged_packet_queue.qlen); |
338 | __skb_queue_purge(list: &peer->staged_packet_queue); |
339 | spin_unlock_bh(lock: &peer->staged_packet_queue.lock); |
340 | } |
341 | |
342 | void wg_packet_send_staged_packets(struct wg_peer *peer) |
343 | { |
344 | struct noise_keypair *keypair; |
345 | struct sk_buff_head packets; |
346 | struct sk_buff *skb; |
347 | |
348 | /* Steal the current queue into our local one. */ |
349 | __skb_queue_head_init(list: &packets); |
350 | spin_lock_bh(lock: &peer->staged_packet_queue.lock); |
351 | skb_queue_splice_init(list: &peer->staged_packet_queue, head: &packets); |
352 | spin_unlock_bh(lock: &peer->staged_packet_queue.lock); |
353 | if (unlikely(skb_queue_empty(&packets))) |
354 | return; |
355 | |
356 | /* First we make sure we have a valid reference to a valid key. */ |
357 | rcu_read_lock_bh(); |
358 | keypair = wg_noise_keypair_get( |
359 | rcu_dereference_bh(peer->keypairs.current_keypair)); |
360 | rcu_read_unlock_bh(); |
361 | if (unlikely(!keypair)) |
362 | goto out_nokey; |
363 | if (unlikely(!READ_ONCE(keypair->sending.is_valid))) |
364 | goto out_nokey; |
365 | if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, |
366 | REJECT_AFTER_TIME))) |
367 | goto out_invalid; |
368 | |
369 | /* After we know we have a somewhat valid key, we now try to assign |
370 | * nonces to all of the packets in the queue. If we can't assign nonces |
371 | * for all of them, we just consider it a failure and wait for the next |
372 | * handshake. |
373 | */ |
374 | skb_queue_walk(&packets, skb) { |
375 | /* 0 for no outer TOS: no leak. TODO: at some later point, we |
376 | * might consider using flowi->tos as outer instead. |
377 | */ |
378 | PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(tos: 0, iph: ip_hdr(skb), skb); |
379 | PACKET_CB(skb)->nonce = |
380 | atomic64_inc_return(v: &keypair->sending_counter) - 1; |
381 | if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) |
382 | goto out_invalid; |
383 | } |
384 | |
385 | packets.prev->next = NULL; |
386 | wg_peer_get(peer: keypair->entry.peer); |
387 | PACKET_CB(packets.next)->keypair = keypair; |
388 | wg_packet_create_data(peer, first: packets.next); |
389 | return; |
390 | |
391 | out_invalid: |
392 | WRITE_ONCE(keypair->sending.is_valid, false); |
393 | out_nokey: |
394 | wg_noise_keypair_put(keypair, unreference_now: false); |
395 | |
396 | /* We orphan the packets if we're waiting on a handshake, so that they |
397 | * don't block a socket's pool. |
398 | */ |
399 | skb_queue_walk(&packets, skb) |
400 | skb_orphan(skb); |
401 | /* Then we put them back on the top of the queue. We're not too |
402 | * concerned about accidentally getting things a little out of order if |
403 | * packets are being added really fast, because this queue is for before |
404 | * packets can even be sent and it's small anyway. |
405 | */ |
406 | spin_lock_bh(lock: &peer->staged_packet_queue.lock); |
407 | skb_queue_splice(list: &packets, head: &peer->staged_packet_queue); |
408 | spin_unlock_bh(lock: &peer->staged_packet_queue.lock); |
409 | |
410 | /* If we're exiting because there's something wrong with the key, it |
411 | * means we should initiate a new handshake. |
412 | */ |
413 | wg_packet_send_queued_handshake_initiation(peer, is_retry: false); |
414 | } |
415 | |