1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * common code for virtio vsock |
4 | * |
5 | * Copyright (C) 2013-2015 Red Hat, Inc. |
6 | * Author: Asias He <asias@redhat.com> |
7 | * Stefan Hajnoczi <stefanha@redhat.com> |
8 | */ |
9 | #include <linux/spinlock.h> |
10 | #include <linux/module.h> |
11 | #include <linux/sched/signal.h> |
12 | #include <linux/ctype.h> |
13 | #include <linux/list.h> |
14 | #include <linux/virtio_vsock.h> |
15 | #include <uapi/linux/vsockmon.h> |
16 | |
17 | #include <net/sock.h> |
18 | #include <net/af_vsock.h> |
19 | |
20 | #define CREATE_TRACE_POINTS |
21 | #include <trace/events/vsock_virtio_transport_common.h> |
22 | |
23 | /* How long to wait for graceful shutdown of a connection */ |
24 | #define VSOCK_CLOSE_TIMEOUT (8 * HZ) |
25 | |
26 | /* Threshold for detecting small packets to copy */ |
27 | #define GOOD_COPY_LEN 128 |
28 | |
29 | static const struct virtio_transport * |
30 | virtio_transport_get_ops(struct vsock_sock *vsk) |
31 | { |
32 | const struct vsock_transport *t = vsock_core_get_transport(vsk); |
33 | |
34 | if (WARN_ON(!t)) |
35 | return NULL; |
36 | |
37 | return container_of(t, struct virtio_transport, transport); |
38 | } |
39 | |
40 | static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, |
41 | struct virtio_vsock_pkt_info *info, |
42 | size_t pkt_len) |
43 | { |
44 | struct iov_iter *iov_iter; |
45 | |
46 | if (!info->msg) |
47 | return false; |
48 | |
49 | iov_iter = &info->msg->msg_iter; |
50 | |
51 | if (iov_iter->iov_offset) |
52 | return false; |
53 | |
54 | /* We can't send whole iov. */ |
55 | if (iov_iter->count > pkt_len) |
56 | return false; |
57 | |
58 | /* Check that transport can send data in zerocopy mode. */ |
59 | t_ops = virtio_transport_get_ops(vsk: info->vsk); |
60 | |
61 | if (t_ops->can_msgzerocopy) { |
62 | int pages_to_send = iov_iter_npages(i: iov_iter, MAX_SKB_FRAGS); |
63 | |
64 | /* +1 is for packet header. */ |
65 | return t_ops->can_msgzerocopy(pages_to_send + 1); |
66 | } |
67 | |
68 | return true; |
69 | } |
70 | |
71 | static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, |
72 | struct sk_buff *skb, |
73 | struct msghdr *msg, |
74 | bool zerocopy) |
75 | { |
76 | struct ubuf_info *uarg; |
77 | |
78 | if (msg->msg_ubuf) { |
79 | uarg = msg->msg_ubuf; |
80 | net_zcopy_get(uarg); |
81 | } else { |
82 | struct iov_iter *iter = &msg->msg_iter; |
83 | struct ubuf_info_msgzc *uarg_zc; |
84 | |
85 | uarg = msg_zerocopy_realloc(sk_vsock(vsk), |
86 | size: iter->count, |
87 | NULL); |
88 | if (!uarg) |
89 | return -1; |
90 | |
91 | uarg_zc = uarg_to_msgzc(uarg); |
92 | uarg_zc->zerocopy = zerocopy ? 1 : 0; |
93 | } |
94 | |
95 | skb_zcopy_init(skb, uarg); |
96 | |
97 | return 0; |
98 | } |
99 | |
100 | static int virtio_transport_fill_skb(struct sk_buff *skb, |
101 | struct virtio_vsock_pkt_info *info, |
102 | size_t len, |
103 | bool zcopy) |
104 | { |
105 | if (zcopy) |
106 | return __zerocopy_sg_from_iter(msg: info->msg, NULL, skb, |
107 | from: &info->msg->msg_iter, |
108 | length: len); |
109 | |
110 | return memcpy_from_msg(data: skb_put(skb, len), msg: info->msg, len); |
111 | } |
112 | |
113 | static void virtio_transport_init_hdr(struct sk_buff *skb, |
114 | struct virtio_vsock_pkt_info *info, |
115 | size_t payload_len, |
116 | u32 src_cid, |
117 | u32 src_port, |
118 | u32 dst_cid, |
119 | u32 dst_port) |
120 | { |
121 | struct virtio_vsock_hdr *hdr; |
122 | |
123 | hdr = virtio_vsock_hdr(skb); |
124 | hdr->type = cpu_to_le16(info->type); |
125 | hdr->op = cpu_to_le16(info->op); |
126 | hdr->src_cid = cpu_to_le64(src_cid); |
127 | hdr->dst_cid = cpu_to_le64(dst_cid); |
128 | hdr->src_port = cpu_to_le32(src_port); |
129 | hdr->dst_port = cpu_to_le32(dst_port); |
130 | hdr->flags = cpu_to_le32(info->flags); |
131 | hdr->len = cpu_to_le32(payload_len); |
132 | hdr->buf_alloc = cpu_to_le32(0); |
133 | hdr->fwd_cnt = cpu_to_le32(0); |
134 | } |
135 | |
136 | static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, |
137 | void *dst, |
138 | size_t len) |
139 | { |
140 | struct iov_iter iov_iter = { 0 }; |
141 | struct kvec kvec; |
142 | size_t to_copy; |
143 | |
144 | kvec.iov_base = dst; |
145 | kvec.iov_len = len; |
146 | |
147 | iov_iter.iter_type = ITER_KVEC; |
148 | iov_iter.kvec = &kvec; |
149 | iov_iter.nr_segs = 1; |
150 | |
151 | to_copy = min_t(size_t, len, skb->len); |
152 | |
153 | skb_copy_datagram_iter(from: skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, |
154 | to: &iov_iter, size: to_copy); |
155 | } |
156 | |
157 | /* Packet capture */ |
158 | static struct sk_buff *virtio_transport_build_skb(void *opaque) |
159 | { |
160 | struct virtio_vsock_hdr *pkt_hdr; |
161 | struct sk_buff *pkt = opaque; |
162 | struct af_vsockmon_hdr *hdr; |
163 | struct sk_buff *skb; |
164 | size_t payload_len; |
165 | |
166 | /* A packet could be split to fit the RX buffer, so we can retrieve |
167 | * the payload length from the header and the buffer pointer taking |
168 | * care of the offset in the original packet. |
169 | */ |
170 | pkt_hdr = virtio_vsock_hdr(skb: pkt); |
171 | payload_len = pkt->len; |
172 | |
173 | skb = alloc_skb(size: sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, |
174 | GFP_ATOMIC); |
175 | if (!skb) |
176 | return NULL; |
177 | |
178 | hdr = skb_put(skb, len: sizeof(*hdr)); |
179 | |
180 | /* pkt->hdr is little-endian so no need to byteswap here */ |
181 | hdr->src_cid = pkt_hdr->src_cid; |
182 | hdr->src_port = pkt_hdr->src_port; |
183 | hdr->dst_cid = pkt_hdr->dst_cid; |
184 | hdr->dst_port = pkt_hdr->dst_port; |
185 | |
186 | hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); |
187 | hdr->len = cpu_to_le16(sizeof(*pkt_hdr)); |
188 | memset(hdr->reserved, 0, sizeof(hdr->reserved)); |
189 | |
190 | switch (le16_to_cpu(pkt_hdr->op)) { |
191 | case VIRTIO_VSOCK_OP_REQUEST: |
192 | case VIRTIO_VSOCK_OP_RESPONSE: |
193 | hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); |
194 | break; |
195 | case VIRTIO_VSOCK_OP_RST: |
196 | case VIRTIO_VSOCK_OP_SHUTDOWN: |
197 | hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); |
198 | break; |
199 | case VIRTIO_VSOCK_OP_RW: |
200 | hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); |
201 | break; |
202 | case VIRTIO_VSOCK_OP_CREDIT_UPDATE: |
203 | case VIRTIO_VSOCK_OP_CREDIT_REQUEST: |
204 | hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); |
205 | break; |
206 | default: |
207 | hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); |
208 | break; |
209 | } |
210 | |
211 | skb_put_data(skb, data: pkt_hdr, len: sizeof(*pkt_hdr)); |
212 | |
213 | if (payload_len) { |
214 | if (skb_is_nonlinear(skb: pkt)) { |
215 | void *data = skb_put(skb, len: payload_len); |
216 | |
217 | virtio_transport_copy_nonlinear_skb(skb: pkt, dst: data, len: payload_len); |
218 | } else { |
219 | skb_put_data(skb, data: pkt->data, len: payload_len); |
220 | } |
221 | } |
222 | |
223 | return skb; |
224 | } |
225 | |
226 | void virtio_transport_deliver_tap_pkt(struct sk_buff *skb) |
227 | { |
228 | if (virtio_vsock_skb_tap_delivered(skb)) |
229 | return; |
230 | |
231 | vsock_deliver_tap(build_skb: virtio_transport_build_skb, opaque: skb); |
232 | virtio_vsock_skb_set_tap_delivered(skb); |
233 | } |
234 | EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); |
235 | |
236 | static u16 virtio_transport_get_type(struct sock *sk) |
237 | { |
238 | if (sk->sk_type == SOCK_STREAM) |
239 | return VIRTIO_VSOCK_TYPE_STREAM; |
240 | else |
241 | return VIRTIO_VSOCK_TYPE_SEQPACKET; |
242 | } |
243 | |
244 | /* Returns new sk_buff on success, otherwise returns NULL. */ |
245 | static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, |
246 | size_t payload_len, |
247 | bool zcopy, |
248 | u32 src_cid, |
249 | u32 src_port, |
250 | u32 dst_cid, |
251 | u32 dst_port) |
252 | { |
253 | struct vsock_sock *vsk; |
254 | struct sk_buff *skb; |
255 | size_t skb_len; |
256 | |
257 | skb_len = VIRTIO_VSOCK_SKB_HEADROOM; |
258 | |
259 | if (!zcopy) |
260 | skb_len += payload_len; |
261 | |
262 | skb = virtio_vsock_alloc_skb(size: skb_len, GFP_KERNEL); |
263 | if (!skb) |
264 | return NULL; |
265 | |
266 | virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, |
267 | dst_cid, dst_port); |
268 | |
269 | vsk = info->vsk; |
270 | |
271 | /* If 'vsk' != NULL then payload is always present, so we |
272 | * will never call '__zerocopy_sg_from_iter()' below without |
273 | * setting skb owner in 'skb_set_owner_w()'. The only case |
274 | * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message |
275 | * without payload. |
276 | */ |
277 | WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); |
278 | |
279 | /* Set owner here, because '__zerocopy_sg_from_iter()' uses |
280 | * owner of skb without check to update 'sk_wmem_alloc'. |
281 | */ |
282 | if (vsk) |
283 | skb_set_owner_w(skb, sk_vsock(vsk)); |
284 | |
285 | if (info->msg && payload_len > 0) { |
286 | int err; |
287 | |
288 | err = virtio_transport_fill_skb(skb, info, len: payload_len, zcopy); |
289 | if (err) |
290 | goto out; |
291 | |
292 | if (msg_data_left(msg: info->msg) == 0 && |
293 | info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { |
294 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
295 | |
296 | hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); |
297 | |
298 | if (info->msg->msg_flags & MSG_EOR) |
299 | hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); |
300 | } |
301 | } |
302 | |
303 | if (info->reply) |
304 | virtio_vsock_skb_set_reply(skb); |
305 | |
306 | trace_virtio_transport_alloc_pkt(src_cid, src_port, |
307 | dst_cid, dst_port, |
308 | len: payload_len, |
309 | type: info->type, |
310 | op: info->op, |
311 | flags: info->flags, |
312 | zcopy); |
313 | |
314 | return skb; |
315 | out: |
316 | kfree_skb(skb); |
317 | return NULL; |
318 | } |
319 | |
320 | /* This function can only be used on connecting/connected sockets, |
321 | * since a socket assigned to a transport is required. |
322 | * |
323 | * Do not use on listener sockets! |
324 | */ |
325 | static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, |
326 | struct virtio_vsock_pkt_info *info) |
327 | { |
328 | u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; |
329 | u32 src_cid, src_port, dst_cid, dst_port; |
330 | const struct virtio_transport *t_ops; |
331 | struct virtio_vsock_sock *vvs; |
332 | u32 pkt_len = info->pkt_len; |
333 | bool can_zcopy = false; |
334 | u32 rest_len; |
335 | int ret; |
336 | |
337 | info->type = virtio_transport_get_type(sk_vsock(vsk)); |
338 | |
339 | t_ops = virtio_transport_get_ops(vsk); |
340 | if (unlikely(!t_ops)) |
341 | return -EFAULT; |
342 | |
343 | src_cid = t_ops->transport.get_local_cid(); |
344 | src_port = vsk->local_addr.svm_port; |
345 | if (!info->remote_cid) { |
346 | dst_cid = vsk->remote_addr.svm_cid; |
347 | dst_port = vsk->remote_addr.svm_port; |
348 | } else { |
349 | dst_cid = info->remote_cid; |
350 | dst_port = info->remote_port; |
351 | } |
352 | |
353 | vvs = vsk->trans; |
354 | |
355 | /* virtio_transport_get_credit might return less than pkt_len credit */ |
356 | pkt_len = virtio_transport_get_credit(vvs, wanted: pkt_len); |
357 | |
358 | /* Do not send zero length OP_RW pkt */ |
359 | if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) |
360 | return pkt_len; |
361 | |
362 | if (info->msg) { |
363 | /* If zerocopy is not enabled by 'setsockopt()', we behave as |
364 | * there is no MSG_ZEROCOPY flag set. |
365 | */ |
366 | if (!sock_flag(sk_vsock(vsk), flag: SOCK_ZEROCOPY)) |
367 | info->msg->msg_flags &= ~MSG_ZEROCOPY; |
368 | |
369 | if (info->msg->msg_flags & MSG_ZEROCOPY) |
370 | can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); |
371 | |
372 | if (can_zcopy) |
373 | max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, |
374 | (MAX_SKB_FRAGS * PAGE_SIZE)); |
375 | } |
376 | |
377 | rest_len = pkt_len; |
378 | |
379 | do { |
380 | struct sk_buff *skb; |
381 | size_t skb_len; |
382 | |
383 | skb_len = min(max_skb_len, rest_len); |
384 | |
385 | skb = virtio_transport_alloc_skb(info, payload_len: skb_len, zcopy: can_zcopy, |
386 | src_cid, src_port, |
387 | dst_cid, dst_port); |
388 | if (!skb) { |
389 | ret = -ENOMEM; |
390 | break; |
391 | } |
392 | |
393 | /* We process buffer part by part, allocating skb on |
394 | * each iteration. If this is last skb for this buffer |
395 | * and MSG_ZEROCOPY mode is in use - we must allocate |
396 | * completion for the current syscall. |
397 | */ |
398 | if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && |
399 | skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { |
400 | if (virtio_transport_init_zcopy_skb(vsk, skb, |
401 | msg: info->msg, |
402 | zerocopy: can_zcopy)) { |
403 | ret = -ENOMEM; |
404 | break; |
405 | } |
406 | } |
407 | |
408 | virtio_transport_inc_tx_pkt(vvs, skb); |
409 | |
410 | ret = t_ops->send_pkt(skb); |
411 | if (ret < 0) |
412 | break; |
413 | |
414 | /* Both virtio and vhost 'send_pkt()' returns 'skb_len', |
415 | * but for reliability use 'ret' instead of 'skb_len'. |
416 | * Also if partial send happens (e.g. 'ret' != 'skb_len') |
417 | * somehow, we break this loop, but account such returned |
418 | * value in 'virtio_transport_put_credit()'. |
419 | */ |
420 | rest_len -= ret; |
421 | |
422 | if (WARN_ONCE(ret != skb_len, |
423 | "'send_pkt()' returns %i, but %zu expected\n" , |
424 | ret, skb_len)) |
425 | break; |
426 | } while (rest_len); |
427 | |
428 | virtio_transport_put_credit(vvs, credit: rest_len); |
429 | |
430 | /* Return number of bytes, if any data has been sent. */ |
431 | if (rest_len != pkt_len) |
432 | ret = pkt_len - rest_len; |
433 | |
434 | return ret; |
435 | } |
436 | |
437 | static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, |
438 | u32 len) |
439 | { |
440 | if (vvs->rx_bytes + len > vvs->buf_alloc) |
441 | return false; |
442 | |
443 | vvs->rx_bytes += len; |
444 | return true; |
445 | } |
446 | |
447 | static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, |
448 | u32 len) |
449 | { |
450 | vvs->rx_bytes -= len; |
451 | vvs->fwd_cnt += len; |
452 | } |
453 | |
454 | void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb) |
455 | { |
456 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
457 | |
458 | spin_lock_bh(lock: &vvs->rx_lock); |
459 | vvs->last_fwd_cnt = vvs->fwd_cnt; |
460 | hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt); |
461 | hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc); |
462 | spin_unlock_bh(lock: &vvs->rx_lock); |
463 | } |
464 | EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); |
465 | |
466 | u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) |
467 | { |
468 | u32 ret; |
469 | |
470 | if (!credit) |
471 | return 0; |
472 | |
473 | spin_lock_bh(lock: &vvs->tx_lock); |
474 | ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); |
475 | if (ret > credit) |
476 | ret = credit; |
477 | vvs->tx_cnt += ret; |
478 | spin_unlock_bh(lock: &vvs->tx_lock); |
479 | |
480 | return ret; |
481 | } |
482 | EXPORT_SYMBOL_GPL(virtio_transport_get_credit); |
483 | |
484 | void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) |
485 | { |
486 | if (!credit) |
487 | return; |
488 | |
489 | spin_lock_bh(lock: &vvs->tx_lock); |
490 | vvs->tx_cnt -= credit; |
491 | spin_unlock_bh(lock: &vvs->tx_lock); |
492 | } |
493 | EXPORT_SYMBOL_GPL(virtio_transport_put_credit); |
494 | |
495 | static int virtio_transport_send_credit_update(struct vsock_sock *vsk) |
496 | { |
497 | struct virtio_vsock_pkt_info info = { |
498 | .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, |
499 | .vsk = vsk, |
500 | }; |
501 | |
502 | return virtio_transport_send_pkt_info(vsk, info: &info); |
503 | } |
504 | |
505 | static ssize_t |
506 | virtio_transport_stream_do_peek(struct vsock_sock *vsk, |
507 | struct msghdr *msg, |
508 | size_t len) |
509 | { |
510 | struct virtio_vsock_sock *vvs = vsk->trans; |
511 | struct sk_buff *skb; |
512 | size_t total = 0; |
513 | int err; |
514 | |
515 | spin_lock_bh(lock: &vvs->rx_lock); |
516 | |
517 | skb_queue_walk(&vvs->rx_queue, skb) { |
518 | size_t bytes; |
519 | |
520 | bytes = len - total; |
521 | if (bytes > skb->len) |
522 | bytes = skb->len; |
523 | |
524 | spin_unlock_bh(lock: &vvs->rx_lock); |
525 | |
526 | /* sk_lock is held by caller so no one else can dequeue. |
527 | * Unlock rx_lock since skb_copy_datagram_iter() may sleep. |
528 | */ |
529 | err = skb_copy_datagram_iter(from: skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, |
530 | to: &msg->msg_iter, size: bytes); |
531 | if (err) |
532 | goto out; |
533 | |
534 | total += bytes; |
535 | |
536 | spin_lock_bh(lock: &vvs->rx_lock); |
537 | |
538 | if (total == len) |
539 | break; |
540 | } |
541 | |
542 | spin_unlock_bh(lock: &vvs->rx_lock); |
543 | |
544 | return total; |
545 | |
546 | out: |
547 | if (total) |
548 | err = total; |
549 | return err; |
550 | } |
551 | |
552 | static ssize_t |
553 | virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, |
554 | struct msghdr *msg, |
555 | size_t len) |
556 | { |
557 | struct virtio_vsock_sock *vvs = vsk->trans; |
558 | size_t bytes, total = 0; |
559 | struct sk_buff *skb; |
560 | u32 fwd_cnt_delta; |
561 | bool low_rx_bytes; |
562 | int err = -EFAULT; |
563 | u32 free_space; |
564 | |
565 | spin_lock_bh(lock: &vvs->rx_lock); |
566 | |
567 | if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes, |
568 | "rx_queue is empty, but rx_bytes is non-zero\n" )) { |
569 | spin_unlock_bh(lock: &vvs->rx_lock); |
570 | return err; |
571 | } |
572 | |
573 | while (total < len && !skb_queue_empty(list: &vvs->rx_queue)) { |
574 | skb = skb_peek(list_: &vvs->rx_queue); |
575 | |
576 | bytes = min_t(size_t, len - total, |
577 | skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); |
578 | |
579 | /* sk_lock is held by caller so no one else can dequeue. |
580 | * Unlock rx_lock since skb_copy_datagram_iter() may sleep. |
581 | */ |
582 | spin_unlock_bh(lock: &vvs->rx_lock); |
583 | |
584 | err = skb_copy_datagram_iter(from: skb, |
585 | VIRTIO_VSOCK_SKB_CB(skb)->offset, |
586 | to: &msg->msg_iter, size: bytes); |
587 | if (err) |
588 | goto out; |
589 | |
590 | spin_lock_bh(lock: &vvs->rx_lock); |
591 | |
592 | total += bytes; |
593 | |
594 | VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; |
595 | |
596 | if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { |
597 | u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); |
598 | |
599 | virtio_transport_dec_rx_pkt(vvs, len: pkt_len); |
600 | __skb_unlink(skb, list: &vvs->rx_queue); |
601 | consume_skb(skb); |
602 | } |
603 | } |
604 | |
605 | fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; |
606 | free_space = vvs->buf_alloc - fwd_cnt_delta; |
607 | low_rx_bytes = (vvs->rx_bytes < |
608 | sock_rcvlowat(sk_vsock(vsk), waitall: 0, INT_MAX)); |
609 | |
610 | spin_unlock_bh(lock: &vvs->rx_lock); |
611 | |
612 | /* To reduce the number of credit update messages, |
613 | * don't update credits as long as lots of space is available. |
614 | * Note: the limit chosen here is arbitrary. Setting the limit |
615 | * too high causes extra messages. Too low causes transmitter |
616 | * stalls. As stalls are in theory more expensive than extra |
617 | * messages, we set the limit to a high value. TODO: experiment |
618 | * with different values. Also send credit update message when |
619 | * number of bytes in rx queue is not enough to wake up reader. |
620 | */ |
621 | if (fwd_cnt_delta && |
622 | (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes)) |
623 | virtio_transport_send_credit_update(vsk); |
624 | |
625 | return total; |
626 | |
627 | out: |
628 | if (total) |
629 | err = total; |
630 | return err; |
631 | } |
632 | |
633 | static ssize_t |
634 | virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, |
635 | struct msghdr *msg) |
636 | { |
637 | struct virtio_vsock_sock *vvs = vsk->trans; |
638 | struct sk_buff *skb; |
639 | size_t total, len; |
640 | |
641 | spin_lock_bh(lock: &vvs->rx_lock); |
642 | |
643 | if (!vvs->msg_count) { |
644 | spin_unlock_bh(lock: &vvs->rx_lock); |
645 | return 0; |
646 | } |
647 | |
648 | total = 0; |
649 | len = msg_data_left(msg); |
650 | |
651 | skb_queue_walk(&vvs->rx_queue, skb) { |
652 | struct virtio_vsock_hdr *hdr; |
653 | |
654 | if (total < len) { |
655 | size_t bytes; |
656 | int err; |
657 | |
658 | bytes = len - total; |
659 | if (bytes > skb->len) |
660 | bytes = skb->len; |
661 | |
662 | spin_unlock_bh(lock: &vvs->rx_lock); |
663 | |
664 | /* sk_lock is held by caller so no one else can dequeue. |
665 | * Unlock rx_lock since skb_copy_datagram_iter() may sleep. |
666 | */ |
667 | err = skb_copy_datagram_iter(from: skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, |
668 | to: &msg->msg_iter, size: bytes); |
669 | if (err) |
670 | return err; |
671 | |
672 | spin_lock_bh(lock: &vvs->rx_lock); |
673 | } |
674 | |
675 | total += skb->len; |
676 | hdr = virtio_vsock_hdr(skb); |
677 | |
678 | if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { |
679 | if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) |
680 | msg->msg_flags |= MSG_EOR; |
681 | |
682 | break; |
683 | } |
684 | } |
685 | |
686 | spin_unlock_bh(lock: &vvs->rx_lock); |
687 | |
688 | return total; |
689 | } |
690 | |
691 | static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, |
692 | struct msghdr *msg, |
693 | int flags) |
694 | { |
695 | struct virtio_vsock_sock *vvs = vsk->trans; |
696 | int dequeued_len = 0; |
697 | size_t user_buf_len = msg_data_left(msg); |
698 | bool msg_ready = false; |
699 | struct sk_buff *skb; |
700 | |
701 | spin_lock_bh(lock: &vvs->rx_lock); |
702 | |
703 | if (vvs->msg_count == 0) { |
704 | spin_unlock_bh(lock: &vvs->rx_lock); |
705 | return 0; |
706 | } |
707 | |
708 | while (!msg_ready) { |
709 | struct virtio_vsock_hdr *hdr; |
710 | size_t pkt_len; |
711 | |
712 | skb = __skb_dequeue(list: &vvs->rx_queue); |
713 | if (!skb) |
714 | break; |
715 | hdr = virtio_vsock_hdr(skb); |
716 | pkt_len = (size_t)le32_to_cpu(hdr->len); |
717 | |
718 | if (dequeued_len >= 0) { |
719 | size_t bytes_to_copy; |
720 | |
721 | bytes_to_copy = min(user_buf_len, pkt_len); |
722 | |
723 | if (bytes_to_copy) { |
724 | int err; |
725 | |
726 | /* sk_lock is held by caller so no one else can dequeue. |
727 | * Unlock rx_lock since skb_copy_datagram_iter() may sleep. |
728 | */ |
729 | spin_unlock_bh(lock: &vvs->rx_lock); |
730 | |
731 | err = skb_copy_datagram_iter(from: skb, offset: 0, |
732 | to: &msg->msg_iter, |
733 | size: bytes_to_copy); |
734 | if (err) { |
735 | /* Copy of message failed. Rest of |
736 | * fragments will be freed without copy. |
737 | */ |
738 | dequeued_len = err; |
739 | } else { |
740 | user_buf_len -= bytes_to_copy; |
741 | } |
742 | |
743 | spin_lock_bh(lock: &vvs->rx_lock); |
744 | } |
745 | |
746 | if (dequeued_len >= 0) |
747 | dequeued_len += pkt_len; |
748 | } |
749 | |
750 | if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { |
751 | msg_ready = true; |
752 | vvs->msg_count--; |
753 | |
754 | if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) |
755 | msg->msg_flags |= MSG_EOR; |
756 | } |
757 | |
758 | virtio_transport_dec_rx_pkt(vvs, len: pkt_len); |
759 | kfree_skb(skb); |
760 | } |
761 | |
762 | spin_unlock_bh(lock: &vvs->rx_lock); |
763 | |
764 | virtio_transport_send_credit_update(vsk); |
765 | |
766 | return dequeued_len; |
767 | } |
768 | |
769 | ssize_t |
770 | virtio_transport_stream_dequeue(struct vsock_sock *vsk, |
771 | struct msghdr *msg, |
772 | size_t len, int flags) |
773 | { |
774 | if (flags & MSG_PEEK) |
775 | return virtio_transport_stream_do_peek(vsk, msg, len); |
776 | else |
777 | return virtio_transport_stream_do_dequeue(vsk, msg, len); |
778 | } |
779 | EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); |
780 | |
781 | ssize_t |
782 | virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, |
783 | struct msghdr *msg, |
784 | int flags) |
785 | { |
786 | if (flags & MSG_PEEK) |
787 | return virtio_transport_seqpacket_do_peek(vsk, msg); |
788 | else |
789 | return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags); |
790 | } |
791 | EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); |
792 | |
793 | int |
794 | virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, |
795 | struct msghdr *msg, |
796 | size_t len) |
797 | { |
798 | struct virtio_vsock_sock *vvs = vsk->trans; |
799 | |
800 | spin_lock_bh(lock: &vvs->tx_lock); |
801 | |
802 | if (len > vvs->peer_buf_alloc) { |
803 | spin_unlock_bh(lock: &vvs->tx_lock); |
804 | return -EMSGSIZE; |
805 | } |
806 | |
807 | spin_unlock_bh(lock: &vvs->tx_lock); |
808 | |
809 | return virtio_transport_stream_enqueue(vsk, msg, len); |
810 | } |
811 | EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue); |
812 | |
813 | int |
814 | virtio_transport_dgram_dequeue(struct vsock_sock *vsk, |
815 | struct msghdr *msg, |
816 | size_t len, int flags) |
817 | { |
818 | return -EOPNOTSUPP; |
819 | } |
820 | EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); |
821 | |
822 | s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) |
823 | { |
824 | struct virtio_vsock_sock *vvs = vsk->trans; |
825 | s64 bytes; |
826 | |
827 | spin_lock_bh(lock: &vvs->rx_lock); |
828 | bytes = vvs->rx_bytes; |
829 | spin_unlock_bh(lock: &vvs->rx_lock); |
830 | |
831 | return bytes; |
832 | } |
833 | EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); |
834 | |
835 | u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk) |
836 | { |
837 | struct virtio_vsock_sock *vvs = vsk->trans; |
838 | u32 msg_count; |
839 | |
840 | spin_lock_bh(lock: &vvs->rx_lock); |
841 | msg_count = vvs->msg_count; |
842 | spin_unlock_bh(lock: &vvs->rx_lock); |
843 | |
844 | return msg_count; |
845 | } |
846 | EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data); |
847 | |
848 | static s64 virtio_transport_has_space(struct vsock_sock *vsk) |
849 | { |
850 | struct virtio_vsock_sock *vvs = vsk->trans; |
851 | s64 bytes; |
852 | |
853 | bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); |
854 | if (bytes < 0) |
855 | bytes = 0; |
856 | |
857 | return bytes; |
858 | } |
859 | |
860 | s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) |
861 | { |
862 | struct virtio_vsock_sock *vvs = vsk->trans; |
863 | s64 bytes; |
864 | |
865 | spin_lock_bh(lock: &vvs->tx_lock); |
866 | bytes = virtio_transport_has_space(vsk); |
867 | spin_unlock_bh(lock: &vvs->tx_lock); |
868 | |
869 | return bytes; |
870 | } |
871 | EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); |
872 | |
873 | int virtio_transport_do_socket_init(struct vsock_sock *vsk, |
874 | struct vsock_sock *psk) |
875 | { |
876 | struct virtio_vsock_sock *vvs; |
877 | |
878 | vvs = kzalloc(size: sizeof(*vvs), GFP_KERNEL); |
879 | if (!vvs) |
880 | return -ENOMEM; |
881 | |
882 | vsk->trans = vvs; |
883 | vvs->vsk = vsk; |
884 | if (psk && psk->trans) { |
885 | struct virtio_vsock_sock *ptrans = psk->trans; |
886 | |
887 | vvs->peer_buf_alloc = ptrans->peer_buf_alloc; |
888 | } |
889 | |
890 | if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) |
891 | vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; |
892 | |
893 | vvs->buf_alloc = vsk->buffer_size; |
894 | |
895 | spin_lock_init(&vvs->rx_lock); |
896 | spin_lock_init(&vvs->tx_lock); |
897 | skb_queue_head_init(list: &vvs->rx_queue); |
898 | |
899 | return 0; |
900 | } |
901 | EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); |
902 | |
903 | /* sk_lock held by the caller */ |
904 | void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) |
905 | { |
906 | struct virtio_vsock_sock *vvs = vsk->trans; |
907 | |
908 | if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) |
909 | *val = VIRTIO_VSOCK_MAX_BUF_SIZE; |
910 | |
911 | vvs->buf_alloc = *val; |
912 | |
913 | virtio_transport_send_credit_update(vsk); |
914 | } |
915 | EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); |
916 | |
917 | int |
918 | virtio_transport_notify_poll_in(struct vsock_sock *vsk, |
919 | size_t target, |
920 | bool *data_ready_now) |
921 | { |
922 | *data_ready_now = vsock_stream_has_data(vsk) >= target; |
923 | |
924 | return 0; |
925 | } |
926 | EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); |
927 | |
928 | int |
929 | virtio_transport_notify_poll_out(struct vsock_sock *vsk, |
930 | size_t target, |
931 | bool *space_avail_now) |
932 | { |
933 | s64 free_space; |
934 | |
935 | free_space = vsock_stream_has_space(vsk); |
936 | if (free_space > 0) |
937 | *space_avail_now = true; |
938 | else if (free_space == 0) |
939 | *space_avail_now = false; |
940 | |
941 | return 0; |
942 | } |
943 | EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); |
944 | |
945 | int virtio_transport_notify_recv_init(struct vsock_sock *vsk, |
946 | size_t target, struct vsock_transport_recv_notify_data *data) |
947 | { |
948 | return 0; |
949 | } |
950 | EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); |
951 | |
952 | int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, |
953 | size_t target, struct vsock_transport_recv_notify_data *data) |
954 | { |
955 | return 0; |
956 | } |
957 | EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); |
958 | |
959 | int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, |
960 | size_t target, struct vsock_transport_recv_notify_data *data) |
961 | { |
962 | return 0; |
963 | } |
964 | EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); |
965 | |
966 | int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, |
967 | size_t target, ssize_t copied, bool data_read, |
968 | struct vsock_transport_recv_notify_data *data) |
969 | { |
970 | return 0; |
971 | } |
972 | EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); |
973 | |
974 | int virtio_transport_notify_send_init(struct vsock_sock *vsk, |
975 | struct vsock_transport_send_notify_data *data) |
976 | { |
977 | return 0; |
978 | } |
979 | EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); |
980 | |
981 | int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, |
982 | struct vsock_transport_send_notify_data *data) |
983 | { |
984 | return 0; |
985 | } |
986 | EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); |
987 | |
988 | int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, |
989 | struct vsock_transport_send_notify_data *data) |
990 | { |
991 | return 0; |
992 | } |
993 | EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); |
994 | |
995 | int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, |
996 | ssize_t written, struct vsock_transport_send_notify_data *data) |
997 | { |
998 | return 0; |
999 | } |
1000 | EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); |
1001 | |
1002 | u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) |
1003 | { |
1004 | return vsk->buffer_size; |
1005 | } |
1006 | EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); |
1007 | |
1008 | bool virtio_transport_stream_is_active(struct vsock_sock *vsk) |
1009 | { |
1010 | return true; |
1011 | } |
1012 | EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); |
1013 | |
1014 | bool virtio_transport_stream_allow(u32 cid, u32 port) |
1015 | { |
1016 | return true; |
1017 | } |
1018 | EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); |
1019 | |
1020 | int virtio_transport_dgram_bind(struct vsock_sock *vsk, |
1021 | struct sockaddr_vm *addr) |
1022 | { |
1023 | return -EOPNOTSUPP; |
1024 | } |
1025 | EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); |
1026 | |
1027 | bool virtio_transport_dgram_allow(u32 cid, u32 port) |
1028 | { |
1029 | return false; |
1030 | } |
1031 | EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); |
1032 | |
1033 | int virtio_transport_connect(struct vsock_sock *vsk) |
1034 | { |
1035 | struct virtio_vsock_pkt_info info = { |
1036 | .op = VIRTIO_VSOCK_OP_REQUEST, |
1037 | .vsk = vsk, |
1038 | }; |
1039 | |
1040 | return virtio_transport_send_pkt_info(vsk, info: &info); |
1041 | } |
1042 | EXPORT_SYMBOL_GPL(virtio_transport_connect); |
1043 | |
1044 | int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) |
1045 | { |
1046 | struct virtio_vsock_pkt_info info = { |
1047 | .op = VIRTIO_VSOCK_OP_SHUTDOWN, |
1048 | .flags = (mode & RCV_SHUTDOWN ? |
1049 | VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | |
1050 | (mode & SEND_SHUTDOWN ? |
1051 | VIRTIO_VSOCK_SHUTDOWN_SEND : 0), |
1052 | .vsk = vsk, |
1053 | }; |
1054 | |
1055 | return virtio_transport_send_pkt_info(vsk, info: &info); |
1056 | } |
1057 | EXPORT_SYMBOL_GPL(virtio_transport_shutdown); |
1058 | |
1059 | int |
1060 | virtio_transport_dgram_enqueue(struct vsock_sock *vsk, |
1061 | struct sockaddr_vm *remote_addr, |
1062 | struct msghdr *msg, |
1063 | size_t dgram_len) |
1064 | { |
1065 | return -EOPNOTSUPP; |
1066 | } |
1067 | EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); |
1068 | |
1069 | ssize_t |
1070 | virtio_transport_stream_enqueue(struct vsock_sock *vsk, |
1071 | struct msghdr *msg, |
1072 | size_t len) |
1073 | { |
1074 | struct virtio_vsock_pkt_info info = { |
1075 | .op = VIRTIO_VSOCK_OP_RW, |
1076 | .msg = msg, |
1077 | .pkt_len = len, |
1078 | .vsk = vsk, |
1079 | }; |
1080 | |
1081 | return virtio_transport_send_pkt_info(vsk, info: &info); |
1082 | } |
1083 | EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); |
1084 | |
1085 | void virtio_transport_destruct(struct vsock_sock *vsk) |
1086 | { |
1087 | struct virtio_vsock_sock *vvs = vsk->trans; |
1088 | |
1089 | kfree(objp: vvs); |
1090 | } |
1091 | EXPORT_SYMBOL_GPL(virtio_transport_destruct); |
1092 | |
1093 | static int virtio_transport_reset(struct vsock_sock *vsk, |
1094 | struct sk_buff *skb) |
1095 | { |
1096 | struct virtio_vsock_pkt_info info = { |
1097 | .op = VIRTIO_VSOCK_OP_RST, |
1098 | .reply = !!skb, |
1099 | .vsk = vsk, |
1100 | }; |
1101 | |
1102 | /* Send RST only if the original pkt is not a RST pkt */ |
1103 | if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST) |
1104 | return 0; |
1105 | |
1106 | return virtio_transport_send_pkt_info(vsk, info: &info); |
1107 | } |
1108 | |
1109 | /* Normally packets are associated with a socket. There may be no socket if an |
1110 | * attempt was made to connect to a socket that does not exist. |
1111 | */ |
1112 | static int virtio_transport_reset_no_sock(const struct virtio_transport *t, |
1113 | struct sk_buff *skb) |
1114 | { |
1115 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1116 | struct virtio_vsock_pkt_info info = { |
1117 | .op = VIRTIO_VSOCK_OP_RST, |
1118 | .type = le16_to_cpu(hdr->type), |
1119 | .reply = true, |
1120 | }; |
1121 | struct sk_buff *reply; |
1122 | |
1123 | /* Send RST only if the original pkt is not a RST pkt */ |
1124 | if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) |
1125 | return 0; |
1126 | |
1127 | if (!t) |
1128 | return -ENOTCONN; |
1129 | |
1130 | reply = virtio_transport_alloc_skb(info: &info, payload_len: 0, zcopy: false, |
1131 | le64_to_cpu(hdr->dst_cid), |
1132 | le32_to_cpu(hdr->dst_port), |
1133 | le64_to_cpu(hdr->src_cid), |
1134 | le32_to_cpu(hdr->src_port)); |
1135 | if (!reply) |
1136 | return -ENOMEM; |
1137 | |
1138 | return t->send_pkt(reply); |
1139 | } |
1140 | |
1141 | /* This function should be called with sk_lock held and SOCK_DONE set */ |
1142 | static void virtio_transport_remove_sock(struct vsock_sock *vsk) |
1143 | { |
1144 | struct virtio_vsock_sock *vvs = vsk->trans; |
1145 | |
1146 | /* We don't need to take rx_lock, as the socket is closing and we are |
1147 | * removing it. |
1148 | */ |
1149 | __skb_queue_purge(list: &vvs->rx_queue); |
1150 | vsock_remove_sock(vsk); |
1151 | } |
1152 | |
1153 | static void virtio_transport_wait_close(struct sock *sk, long timeout) |
1154 | { |
1155 | if (timeout) { |
1156 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
1157 | |
1158 | add_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait); |
1159 | |
1160 | do { |
1161 | if (sk_wait_event(sk, &timeout, |
1162 | sock_flag(sk, SOCK_DONE), &wait)) |
1163 | break; |
1164 | } while (!signal_pending(current) && timeout); |
1165 | |
1166 | remove_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait); |
1167 | } |
1168 | } |
1169 | |
1170 | static void virtio_transport_do_close(struct vsock_sock *vsk, |
1171 | bool cancel_timeout) |
1172 | { |
1173 | struct sock *sk = sk_vsock(vsk); |
1174 | |
1175 | sock_set_flag(sk, flag: SOCK_DONE); |
1176 | vsk->peer_shutdown = SHUTDOWN_MASK; |
1177 | if (vsock_stream_has_data(vsk) <= 0) |
1178 | sk->sk_state = TCP_CLOSING; |
1179 | sk->sk_state_change(sk); |
1180 | |
1181 | if (vsk->close_work_scheduled && |
1182 | (!cancel_timeout || cancel_delayed_work(dwork: &vsk->close_work))) { |
1183 | vsk->close_work_scheduled = false; |
1184 | |
1185 | virtio_transport_remove_sock(vsk); |
1186 | |
1187 | /* Release refcnt obtained when we scheduled the timeout */ |
1188 | sock_put(sk); |
1189 | } |
1190 | } |
1191 | |
1192 | static void virtio_transport_close_timeout(struct work_struct *work) |
1193 | { |
1194 | struct vsock_sock *vsk = |
1195 | container_of(work, struct vsock_sock, close_work.work); |
1196 | struct sock *sk = sk_vsock(vsk); |
1197 | |
1198 | sock_hold(sk); |
1199 | lock_sock(sk); |
1200 | |
1201 | if (!sock_flag(sk, flag: SOCK_DONE)) { |
1202 | (void)virtio_transport_reset(vsk, NULL); |
1203 | |
1204 | virtio_transport_do_close(vsk, cancel_timeout: false); |
1205 | } |
1206 | |
1207 | vsk->close_work_scheduled = false; |
1208 | |
1209 | release_sock(sk); |
1210 | sock_put(sk); |
1211 | } |
1212 | |
1213 | /* User context, vsk->sk is locked */ |
1214 | static bool virtio_transport_close(struct vsock_sock *vsk) |
1215 | { |
1216 | struct sock *sk = &vsk->sk; |
1217 | |
1218 | if (!(sk->sk_state == TCP_ESTABLISHED || |
1219 | sk->sk_state == TCP_CLOSING)) |
1220 | return true; |
1221 | |
1222 | /* Already received SHUTDOWN from peer, reply with RST */ |
1223 | if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { |
1224 | (void)virtio_transport_reset(vsk, NULL); |
1225 | return true; |
1226 | } |
1227 | |
1228 | if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) |
1229 | (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); |
1230 | |
1231 | if (sock_flag(sk, flag: SOCK_LINGER) && !(current->flags & PF_EXITING)) |
1232 | virtio_transport_wait_close(sk, timeout: sk->sk_lingertime); |
1233 | |
1234 | if (sock_flag(sk, flag: SOCK_DONE)) { |
1235 | return true; |
1236 | } |
1237 | |
1238 | sock_hold(sk); |
1239 | INIT_DELAYED_WORK(&vsk->close_work, |
1240 | virtio_transport_close_timeout); |
1241 | vsk->close_work_scheduled = true; |
1242 | schedule_delayed_work(dwork: &vsk->close_work, VSOCK_CLOSE_TIMEOUT); |
1243 | return false; |
1244 | } |
1245 | |
1246 | void virtio_transport_release(struct vsock_sock *vsk) |
1247 | { |
1248 | struct sock *sk = &vsk->sk; |
1249 | bool remove_sock = true; |
1250 | |
1251 | if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) |
1252 | remove_sock = virtio_transport_close(vsk); |
1253 | |
1254 | if (remove_sock) { |
1255 | sock_set_flag(sk, flag: SOCK_DONE); |
1256 | virtio_transport_remove_sock(vsk); |
1257 | } |
1258 | } |
1259 | EXPORT_SYMBOL_GPL(virtio_transport_release); |
1260 | |
1261 | static int |
1262 | virtio_transport_recv_connecting(struct sock *sk, |
1263 | struct sk_buff *skb) |
1264 | { |
1265 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1266 | struct vsock_sock *vsk = vsock_sk(sk); |
1267 | int skerr; |
1268 | int err; |
1269 | |
1270 | switch (le16_to_cpu(hdr->op)) { |
1271 | case VIRTIO_VSOCK_OP_RESPONSE: |
1272 | sk->sk_state = TCP_ESTABLISHED; |
1273 | sk->sk_socket->state = SS_CONNECTED; |
1274 | vsock_insert_connected(vsk); |
1275 | sk->sk_state_change(sk); |
1276 | break; |
1277 | case VIRTIO_VSOCK_OP_INVALID: |
1278 | break; |
1279 | case VIRTIO_VSOCK_OP_RST: |
1280 | skerr = ECONNRESET; |
1281 | err = 0; |
1282 | goto destroy; |
1283 | default: |
1284 | skerr = EPROTO; |
1285 | err = -EINVAL; |
1286 | goto destroy; |
1287 | } |
1288 | return 0; |
1289 | |
1290 | destroy: |
1291 | virtio_transport_reset(vsk, skb); |
1292 | sk->sk_state = TCP_CLOSE; |
1293 | sk->sk_err = skerr; |
1294 | sk_error_report(sk); |
1295 | return err; |
1296 | } |
1297 | |
1298 | static void |
1299 | virtio_transport_recv_enqueue(struct vsock_sock *vsk, |
1300 | struct sk_buff *skb) |
1301 | { |
1302 | struct virtio_vsock_sock *vvs = vsk->trans; |
1303 | bool can_enqueue, free_pkt = false; |
1304 | struct virtio_vsock_hdr *hdr; |
1305 | u32 len; |
1306 | |
1307 | hdr = virtio_vsock_hdr(skb); |
1308 | len = le32_to_cpu(hdr->len); |
1309 | |
1310 | spin_lock_bh(lock: &vvs->rx_lock); |
1311 | |
1312 | can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); |
1313 | if (!can_enqueue) { |
1314 | free_pkt = true; |
1315 | goto out; |
1316 | } |
1317 | |
1318 | if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) |
1319 | vvs->msg_count++; |
1320 | |
1321 | /* Try to copy small packets into the buffer of last packet queued, |
1322 | * to avoid wasting memory queueing the entire buffer with a small |
1323 | * payload. |
1324 | */ |
1325 | if (len <= GOOD_COPY_LEN && !skb_queue_empty(list: &vvs->rx_queue)) { |
1326 | struct virtio_vsock_hdr *last_hdr; |
1327 | struct sk_buff *last_skb; |
1328 | |
1329 | last_skb = skb_peek_tail(list_: &vvs->rx_queue); |
1330 | last_hdr = virtio_vsock_hdr(skb: last_skb); |
1331 | |
1332 | /* If there is space in the last packet queued, we copy the |
1333 | * new packet in its buffer. We avoid this if the last packet |
1334 | * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is |
1335 | * delimiter of SEQPACKET message, so 'pkt' is the first packet |
1336 | * of a new message. |
1337 | */ |
1338 | if (skb->len < skb_tailroom(skb: last_skb) && |
1339 | !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) { |
1340 | memcpy(skb_put(last_skb, skb->len), skb->data, skb->len); |
1341 | free_pkt = true; |
1342 | last_hdr->flags |= hdr->flags; |
1343 | le32_add_cpu(var: &last_hdr->len, val: len); |
1344 | goto out; |
1345 | } |
1346 | } |
1347 | |
1348 | __skb_queue_tail(list: &vvs->rx_queue, newsk: skb); |
1349 | |
1350 | out: |
1351 | spin_unlock_bh(lock: &vvs->rx_lock); |
1352 | if (free_pkt) |
1353 | kfree_skb(skb); |
1354 | } |
1355 | |
1356 | static int |
1357 | virtio_transport_recv_connected(struct sock *sk, |
1358 | struct sk_buff *skb) |
1359 | { |
1360 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1361 | struct vsock_sock *vsk = vsock_sk(sk); |
1362 | int err = 0; |
1363 | |
1364 | switch (le16_to_cpu(hdr->op)) { |
1365 | case VIRTIO_VSOCK_OP_RW: |
1366 | virtio_transport_recv_enqueue(vsk, skb); |
1367 | vsock_data_ready(sk); |
1368 | return err; |
1369 | case VIRTIO_VSOCK_OP_CREDIT_REQUEST: |
1370 | virtio_transport_send_credit_update(vsk); |
1371 | break; |
1372 | case VIRTIO_VSOCK_OP_CREDIT_UPDATE: |
1373 | sk->sk_write_space(sk); |
1374 | break; |
1375 | case VIRTIO_VSOCK_OP_SHUTDOWN: |
1376 | if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) |
1377 | vsk->peer_shutdown |= RCV_SHUTDOWN; |
1378 | if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) |
1379 | vsk->peer_shutdown |= SEND_SHUTDOWN; |
1380 | if (vsk->peer_shutdown == SHUTDOWN_MASK) { |
1381 | if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, flag: SOCK_DONE)) { |
1382 | (void)virtio_transport_reset(vsk, NULL); |
1383 | virtio_transport_do_close(vsk, cancel_timeout: true); |
1384 | } |
1385 | /* Remove this socket anyway because the remote peer sent |
1386 | * the shutdown. This way a new connection will succeed |
1387 | * if the remote peer uses the same source port, |
1388 | * even if the old socket is still unreleased, but now disconnected. |
1389 | */ |
1390 | vsock_remove_sock(vsk); |
1391 | } |
1392 | if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) |
1393 | sk->sk_state_change(sk); |
1394 | break; |
1395 | case VIRTIO_VSOCK_OP_RST: |
1396 | virtio_transport_do_close(vsk, cancel_timeout: true); |
1397 | break; |
1398 | default: |
1399 | err = -EINVAL; |
1400 | break; |
1401 | } |
1402 | |
1403 | kfree_skb(skb); |
1404 | return err; |
1405 | } |
1406 | |
1407 | static void |
1408 | virtio_transport_recv_disconnecting(struct sock *sk, |
1409 | struct sk_buff *skb) |
1410 | { |
1411 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1412 | struct vsock_sock *vsk = vsock_sk(sk); |
1413 | |
1414 | if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) |
1415 | virtio_transport_do_close(vsk, cancel_timeout: true); |
1416 | } |
1417 | |
1418 | static int |
1419 | virtio_transport_send_response(struct vsock_sock *vsk, |
1420 | struct sk_buff *skb) |
1421 | { |
1422 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1423 | struct virtio_vsock_pkt_info info = { |
1424 | .op = VIRTIO_VSOCK_OP_RESPONSE, |
1425 | .remote_cid = le64_to_cpu(hdr->src_cid), |
1426 | .remote_port = le32_to_cpu(hdr->src_port), |
1427 | .reply = true, |
1428 | .vsk = vsk, |
1429 | }; |
1430 | |
1431 | return virtio_transport_send_pkt_info(vsk, info: &info); |
1432 | } |
1433 | |
1434 | static bool virtio_transport_space_update(struct sock *sk, |
1435 | struct sk_buff *skb) |
1436 | { |
1437 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1438 | struct vsock_sock *vsk = vsock_sk(sk); |
1439 | struct virtio_vsock_sock *vvs = vsk->trans; |
1440 | bool space_available; |
1441 | |
1442 | /* Listener sockets are not associated with any transport, so we are |
1443 | * not able to take the state to see if there is space available in the |
1444 | * remote peer, but since they are only used to receive requests, we |
1445 | * can assume that there is always space available in the other peer. |
1446 | */ |
1447 | if (!vvs) |
1448 | return true; |
1449 | |
1450 | /* buf_alloc and fwd_cnt is always included in the hdr */ |
1451 | spin_lock_bh(lock: &vvs->tx_lock); |
1452 | vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc); |
1453 | vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt); |
1454 | space_available = virtio_transport_has_space(vsk); |
1455 | spin_unlock_bh(lock: &vvs->tx_lock); |
1456 | return space_available; |
1457 | } |
1458 | |
1459 | /* Handle server socket */ |
1460 | static int |
1461 | virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, |
1462 | struct virtio_transport *t) |
1463 | { |
1464 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1465 | struct vsock_sock *vsk = vsock_sk(sk); |
1466 | struct vsock_sock *vchild; |
1467 | struct sock *child; |
1468 | int ret; |
1469 | |
1470 | if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) { |
1471 | virtio_transport_reset_no_sock(t, skb); |
1472 | return -EINVAL; |
1473 | } |
1474 | |
1475 | if (sk_acceptq_is_full(sk)) { |
1476 | virtio_transport_reset_no_sock(t, skb); |
1477 | return -ENOMEM; |
1478 | } |
1479 | |
1480 | child = vsock_create_connected(parent: sk); |
1481 | if (!child) { |
1482 | virtio_transport_reset_no_sock(t, skb); |
1483 | return -ENOMEM; |
1484 | } |
1485 | |
1486 | sk_acceptq_added(sk); |
1487 | |
1488 | lock_sock_nested(sk: child, SINGLE_DEPTH_NESTING); |
1489 | |
1490 | child->sk_state = TCP_ESTABLISHED; |
1491 | |
1492 | vchild = vsock_sk(child); |
1493 | vsock_addr_init(addr: &vchild->local_addr, le64_to_cpu(hdr->dst_cid), |
1494 | le32_to_cpu(hdr->dst_port)); |
1495 | vsock_addr_init(addr: &vchild->remote_addr, le64_to_cpu(hdr->src_cid), |
1496 | le32_to_cpu(hdr->src_port)); |
1497 | |
1498 | ret = vsock_assign_transport(vsk: vchild, psk: vsk); |
1499 | /* Transport assigned (looking at remote_addr) must be the same |
1500 | * where we received the request. |
1501 | */ |
1502 | if (ret || vchild->transport != &t->transport) { |
1503 | release_sock(sk: child); |
1504 | virtio_transport_reset_no_sock(t, skb); |
1505 | sock_put(sk: child); |
1506 | return ret; |
1507 | } |
1508 | |
1509 | if (virtio_transport_space_update(sk: child, skb)) |
1510 | child->sk_write_space(child); |
1511 | |
1512 | vsock_insert_connected(vsk: vchild); |
1513 | vsock_enqueue_accept(listener: sk, connected: child); |
1514 | virtio_transport_send_response(vsk: vchild, skb); |
1515 | |
1516 | release_sock(sk: child); |
1517 | |
1518 | sk->sk_data_ready(sk); |
1519 | return 0; |
1520 | } |
1521 | |
1522 | static bool virtio_transport_valid_type(u16 type) |
1523 | { |
1524 | return (type == VIRTIO_VSOCK_TYPE_STREAM) || |
1525 | (type == VIRTIO_VSOCK_TYPE_SEQPACKET); |
1526 | } |
1527 | |
1528 | /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex |
1529 | * lock. |
1530 | */ |
1531 | void virtio_transport_recv_pkt(struct virtio_transport *t, |
1532 | struct sk_buff *skb) |
1533 | { |
1534 | struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); |
1535 | struct sockaddr_vm src, dst; |
1536 | struct vsock_sock *vsk; |
1537 | struct sock *sk; |
1538 | bool space_available; |
1539 | |
1540 | vsock_addr_init(addr: &src, le64_to_cpu(hdr->src_cid), |
1541 | le32_to_cpu(hdr->src_port)); |
1542 | vsock_addr_init(addr: &dst, le64_to_cpu(hdr->dst_cid), |
1543 | le32_to_cpu(hdr->dst_port)); |
1544 | |
1545 | trace_virtio_transport_recv_pkt(src_cid: src.svm_cid, src_port: src.svm_port, |
1546 | dst_cid: dst.svm_cid, dst_port: dst.svm_port, |
1547 | le32_to_cpu(hdr->len), |
1548 | le16_to_cpu(hdr->type), |
1549 | le16_to_cpu(hdr->op), |
1550 | le32_to_cpu(hdr->flags), |
1551 | le32_to_cpu(hdr->buf_alloc), |
1552 | le32_to_cpu(hdr->fwd_cnt)); |
1553 | |
1554 | if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { |
1555 | (void)virtio_transport_reset_no_sock(t, skb); |
1556 | goto free_pkt; |
1557 | } |
1558 | |
1559 | /* The socket must be in connected or bound table |
1560 | * otherwise send reset back |
1561 | */ |
1562 | sk = vsock_find_connected_socket(src: &src, dst: &dst); |
1563 | if (!sk) { |
1564 | sk = vsock_find_bound_socket(addr: &dst); |
1565 | if (!sk) { |
1566 | (void)virtio_transport_reset_no_sock(t, skb); |
1567 | goto free_pkt; |
1568 | } |
1569 | } |
1570 | |
1571 | if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { |
1572 | (void)virtio_transport_reset_no_sock(t, skb); |
1573 | sock_put(sk); |
1574 | goto free_pkt; |
1575 | } |
1576 | |
1577 | if (!skb_set_owner_sk_safe(skb, sk)) { |
1578 | WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n" ); |
1579 | goto free_pkt; |
1580 | } |
1581 | |
1582 | vsk = vsock_sk(sk); |
1583 | |
1584 | lock_sock(sk); |
1585 | |
1586 | /* Check if sk has been closed before lock_sock */ |
1587 | if (sock_flag(sk, flag: SOCK_DONE)) { |
1588 | (void)virtio_transport_reset_no_sock(t, skb); |
1589 | release_sock(sk); |
1590 | sock_put(sk); |
1591 | goto free_pkt; |
1592 | } |
1593 | |
1594 | space_available = virtio_transport_space_update(sk, skb); |
1595 | |
1596 | /* Update CID in case it has changed after a transport reset event */ |
1597 | if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) |
1598 | vsk->local_addr.svm_cid = dst.svm_cid; |
1599 | |
1600 | if (space_available) |
1601 | sk->sk_write_space(sk); |
1602 | |
1603 | switch (sk->sk_state) { |
1604 | case TCP_LISTEN: |
1605 | virtio_transport_recv_listen(sk, skb, t); |
1606 | kfree_skb(skb); |
1607 | break; |
1608 | case TCP_SYN_SENT: |
1609 | virtio_transport_recv_connecting(sk, skb); |
1610 | kfree_skb(skb); |
1611 | break; |
1612 | case TCP_ESTABLISHED: |
1613 | virtio_transport_recv_connected(sk, skb); |
1614 | break; |
1615 | case TCP_CLOSING: |
1616 | virtio_transport_recv_disconnecting(sk, skb); |
1617 | kfree_skb(skb); |
1618 | break; |
1619 | default: |
1620 | (void)virtio_transport_reset_no_sock(t, skb); |
1621 | kfree_skb(skb); |
1622 | break; |
1623 | } |
1624 | |
1625 | release_sock(sk); |
1626 | |
1627 | /* Release refcnt obtained when we fetched this socket out of the |
1628 | * bound or connected list. |
1629 | */ |
1630 | sock_put(sk); |
1631 | return; |
1632 | |
1633 | free_pkt: |
1634 | kfree_skb(skb); |
1635 | } |
1636 | EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); |
1637 | |
1638 | /* Remove skbs found in a queue that have a vsk that matches. |
1639 | * |
1640 | * Each skb is freed. |
1641 | * |
1642 | * Returns the count of skbs that were reply packets. |
1643 | */ |
1644 | int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue) |
1645 | { |
1646 | struct sk_buff_head freeme; |
1647 | struct sk_buff *skb, *tmp; |
1648 | int cnt = 0; |
1649 | |
1650 | skb_queue_head_init(list: &freeme); |
1651 | |
1652 | spin_lock_bh(lock: &queue->lock); |
1653 | skb_queue_walk_safe(queue, skb, tmp) { |
1654 | if (vsock_sk(skb->sk) != vsk) |
1655 | continue; |
1656 | |
1657 | __skb_unlink(skb, list: queue); |
1658 | __skb_queue_tail(list: &freeme, newsk: skb); |
1659 | |
1660 | if (virtio_vsock_skb_reply(skb)) |
1661 | cnt++; |
1662 | } |
1663 | spin_unlock_bh(lock: &queue->lock); |
1664 | |
1665 | __skb_queue_purge(list: &freeme); |
1666 | |
1667 | return cnt; |
1668 | } |
1669 | EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs); |
1670 | |
1671 | int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor) |
1672 | { |
1673 | struct virtio_vsock_sock *vvs = vsk->trans; |
1674 | struct sock *sk = sk_vsock(vsk); |
1675 | struct sk_buff *skb; |
1676 | int off = 0; |
1677 | int err; |
1678 | |
1679 | spin_lock_bh(lock: &vvs->rx_lock); |
1680 | /* Use __skb_recv_datagram() for race-free handling of the receive. It |
1681 | * works for types other than dgrams. |
1682 | */ |
1683 | skb = __skb_recv_datagram(sk, sk_queue: &vvs->rx_queue, MSG_DONTWAIT, off: &off, err: &err); |
1684 | spin_unlock_bh(lock: &vvs->rx_lock); |
1685 | |
1686 | if (!skb) |
1687 | return err; |
1688 | |
1689 | return recv_actor(sk, skb); |
1690 | } |
1691 | EXPORT_SYMBOL_GPL(virtio_transport_read_skb); |
1692 | |
1693 | int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val) |
1694 | { |
1695 | struct virtio_vsock_sock *vvs = vsk->trans; |
1696 | bool send_update; |
1697 | |
1698 | spin_lock_bh(lock: &vvs->rx_lock); |
1699 | |
1700 | /* If number of available bytes is less than new SO_RCVLOWAT value, |
1701 | * kick sender to send more data, because sender may sleep in its |
1702 | * 'send()' syscall waiting for enough space at our side. Also |
1703 | * don't send credit update when peer already knows actual value - |
1704 | * such transmission will be useless. |
1705 | */ |
1706 | send_update = (vvs->rx_bytes < val) && |
1707 | (vvs->fwd_cnt != vvs->last_fwd_cnt); |
1708 | |
1709 | spin_unlock_bh(lock: &vvs->rx_lock); |
1710 | |
1711 | if (send_update) { |
1712 | int err; |
1713 | |
1714 | err = virtio_transport_send_credit_update(vsk); |
1715 | if (err < 0) |
1716 | return err; |
1717 | } |
1718 | |
1719 | return 0; |
1720 | } |
1721 | EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat); |
1722 | |
1723 | MODULE_LICENSE("GPL v2" ); |
1724 | MODULE_AUTHOR("Asias He" ); |
1725 | MODULE_DESCRIPTION("common code for virtio vsock" ); |
1726 | |