1 | // SPDX-License-Identifier: BSD-3-Clause |
2 | /* |
3 | * Copyright (c) 2009-2012,2016,2023 Microsoft Corp. |
4 | * Copyright (c) 2012 NetApp Inc. |
5 | * Copyright (c) 2012 Citrix Inc. |
6 | * All rights reserved. |
7 | */ |
8 | |
9 | #include <errno.h> |
10 | #include <fcntl.h> |
11 | #include <emmintrin.h> |
12 | #include <linux/limits.h> |
13 | #include <stdbool.h> |
14 | #include <stdint.h> |
15 | #include <stdio.h> |
16 | #include <string.h> |
17 | #include <sys/mman.h> |
18 | #include <sys/uio.h> |
19 | #include <unistd.h> |
20 | #include "vmbus_bufring.h" |
21 | |
22 | /** |
23 | * Compiler barrier. |
24 | * |
25 | * Guarantees that operation reordering does not occur at compile time |
26 | * for operations directly before and after the barrier. |
27 | */ |
28 | #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); }) |
29 | |
30 | #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF |
31 | #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1))))) |
32 | |
33 | void *vmbus_uio_map(int *fd, int size) |
34 | { |
35 | void *map; |
36 | |
37 | map = mmap(NULL, len: 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, fd: *fd, offset: 0); |
38 | if (map == MAP_FAILED) |
39 | return NULL; |
40 | |
41 | return map; |
42 | } |
43 | |
44 | /* Increase bufring index by inc with wraparound */ |
45 | static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz) |
46 | { |
47 | idx += inc; |
48 | if (idx >= sz) |
49 | idx -= sz; |
50 | |
51 | return idx; |
52 | } |
53 | |
54 | void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen) |
55 | { |
56 | br->vbr = buf; |
57 | br->windex = br->vbr->windex; |
58 | br->dsize = blen - sizeof(struct vmbus_bufring); |
59 | } |
60 | |
61 | static inline __always_inline void |
62 | rte_smp_mb(void) |
63 | { |
64 | asm volatile("lock addl $0, -128(%%rsp); " ::: "memory" ); |
65 | } |
66 | |
67 | static inline int |
68 | rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) |
69 | { |
70 | uint8_t res; |
71 | |
72 | asm volatile("lock ; " |
73 | "cmpxchgl %[src], %[dst];" |
74 | "sete %[res];" |
75 | : [res] "=a" (res), /* output */ |
76 | [dst] "=m" (*dst) |
77 | : [src] "r" (src), /* input */ |
78 | "a" (exp), |
79 | "m" (*dst) |
80 | : "memory" ); /* no-clobber list */ |
81 | return res; |
82 | } |
83 | |
84 | static inline uint32_t |
85 | vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex, |
86 | const void *src0, uint32_t cplen) |
87 | { |
88 | uint8_t *br_data = tbr->vbr->data; |
89 | uint32_t br_dsize = tbr->dsize; |
90 | const uint8_t *src = src0; |
91 | |
92 | /* XXX use double mapping like Linux kernel? */ |
93 | if (cplen > br_dsize - windex) { |
94 | uint32_t fraglen = br_dsize - windex; |
95 | |
96 | /* Wrap-around detected */ |
97 | memcpy(dest: br_data + windex, src: src, n: fraglen); |
98 | memcpy(dest: br_data, src: src + fraglen, n: cplen - fraglen); |
99 | } else { |
100 | memcpy(dest: br_data + windex, src: src, n: cplen); |
101 | } |
102 | |
103 | return vmbus_br_idxinc(idx: windex, inc: cplen, sz: br_dsize); |
104 | } |
105 | |
106 | /* |
107 | * Write scattered channel packet to TX bufring. |
108 | * |
109 | * The offset of this channel packet is written as a 64bits value |
110 | * immediately after this channel packet. |
111 | * |
112 | * The write goes through three stages: |
113 | * 1. Reserve space in ring buffer for the new data. |
114 | * Writer atomically moves priv_write_index. |
115 | * 2. Copy the new data into the ring. |
116 | * 3. Update the tail of the ring (visible to host) that indicates |
117 | * next read location. Writer updates write_index |
118 | */ |
119 | static int |
120 | vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen) |
121 | { |
122 | struct vmbus_bufring *vbr = tbr->vbr; |
123 | uint32_t ring_size = tbr->dsize; |
124 | uint32_t old_windex, next_windex, windex, total; |
125 | uint64_t save_windex; |
126 | int i; |
127 | |
128 | total = 0; |
129 | for (i = 0; i < iovlen; i++) |
130 | total += iov[i].iov_len; |
131 | total += sizeof(save_windex); |
132 | |
133 | /* Reserve space in ring */ |
134 | do { |
135 | uint32_t avail; |
136 | |
137 | /* Get current free location */ |
138 | old_windex = tbr->windex; |
139 | |
140 | /* Prevent compiler reordering this with calculation */ |
141 | rte_compiler_barrier(); |
142 | |
143 | avail = vmbus_br_availwrite(br: tbr, windex: old_windex); |
144 | |
145 | /* If not enough space in ring, then tell caller. */ |
146 | if (avail <= total) |
147 | return -EAGAIN; |
148 | |
149 | next_windex = vmbus_br_idxinc(idx: old_windex, inc: total, sz: ring_size); |
150 | |
151 | /* Atomic update of next write_index for other threads */ |
152 | } while (!rte_atomic32_cmpset(dst: &tbr->windex, exp: old_windex, src: next_windex)); |
153 | |
154 | /* Space from old..new is now reserved */ |
155 | windex = old_windex; |
156 | for (i = 0; i < iovlen; i++) |
157 | windex = vmbus_txbr_copyto(tbr, windex, src0: iov[i].iov_base, cplen: iov[i].iov_len); |
158 | |
159 | /* Set the offset of the current channel packet. */ |
160 | save_windex = ((uint64_t)old_windex) << 32; |
161 | windex = vmbus_txbr_copyto(tbr, windex, src0: &save_windex, |
162 | cplen: sizeof(save_windex)); |
163 | |
164 | /* The region reserved should match region used */ |
165 | if (windex != next_windex) |
166 | return -EINVAL; |
167 | |
168 | /* Ensure that data is available before updating host index */ |
169 | rte_compiler_barrier(); |
170 | |
171 | /* Checkin for our reservation. wait for our turn to update host */ |
172 | while (!rte_atomic32_cmpset(dst: &vbr->windex, exp: old_windex, src: next_windex)) |
173 | _mm_pause(); |
174 | |
175 | return 0; |
176 | } |
177 | |
178 | int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data, |
179 | uint32_t dlen, uint32_t flags) |
180 | { |
181 | struct vmbus_chanpkt pkt; |
182 | unsigned int pktlen, pad_pktlen; |
183 | const uint32_t hlen = sizeof(pkt); |
184 | uint64_t pad = 0; |
185 | struct iovec iov[3]; |
186 | int error; |
187 | |
188 | pktlen = hlen + dlen; |
189 | pad_pktlen = ALIGN(pktlen, sizeof(uint64_t)); |
190 | |
191 | pkt.hdr.type = type; |
192 | pkt.hdr.flags = flags; |
193 | pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; |
194 | pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; |
195 | pkt.hdr.xactid = VMBUS_RQST_ERROR; |
196 | |
197 | iov[0].iov_base = &pkt; |
198 | iov[0].iov_len = hlen; |
199 | iov[1].iov_base = data; |
200 | iov[1].iov_len = dlen; |
201 | iov[2].iov_base = &pad; |
202 | iov[2].iov_len = pad_pktlen - pktlen; |
203 | |
204 | error = vmbus_txbr_write(tbr: txbr, iov, iovlen: 3); |
205 | |
206 | return error; |
207 | } |
208 | |
209 | static inline uint32_t |
210 | vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex, |
211 | void *dst0, size_t cplen) |
212 | { |
213 | const uint8_t *br_data = rbr->vbr->data; |
214 | uint32_t br_dsize = rbr->dsize; |
215 | uint8_t *dst = dst0; |
216 | |
217 | if (cplen > br_dsize - rindex) { |
218 | uint32_t fraglen = br_dsize - rindex; |
219 | |
220 | /* Wrap-around detected. */ |
221 | memcpy(dest: dst, src: br_data + rindex, n: fraglen); |
222 | memcpy(dest: dst + fraglen, src: br_data, n: cplen - fraglen); |
223 | } else { |
224 | memcpy(dest: dst, src: br_data + rindex, n: cplen); |
225 | } |
226 | |
227 | return vmbus_br_idxinc(idx: rindex, inc: cplen, sz: br_dsize); |
228 | } |
229 | |
230 | /* Copy data from receive ring but don't change index */ |
231 | static int |
232 | vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen) |
233 | { |
234 | uint32_t avail; |
235 | |
236 | /* |
237 | * The requested data and the 64bits channel packet |
238 | * offset should be there at least. |
239 | */ |
240 | avail = vmbus_br_availread(br: rbr); |
241 | if (avail < dlen + sizeof(uint64_t)) |
242 | return -EAGAIN; |
243 | |
244 | vmbus_rxbr_copyfrom(rbr, rindex: rbr->vbr->rindex, dst0: data, cplen: dlen); |
245 | return 0; |
246 | } |
247 | |
248 | /* |
249 | * Copy data from receive ring and change index |
250 | * NOTE: |
251 | * We assume (dlen + skip) == sizeof(channel packet). |
252 | */ |
253 | static int |
254 | vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip) |
255 | { |
256 | struct vmbus_bufring *vbr = rbr->vbr; |
257 | uint32_t br_dsize = rbr->dsize; |
258 | uint32_t rindex; |
259 | |
260 | if (vmbus_br_availread(br: rbr) < dlen + skip + sizeof(uint64_t)) |
261 | return -EAGAIN; |
262 | |
263 | /* Record where host was when we started read (for debug) */ |
264 | rbr->windex = rbr->vbr->windex; |
265 | |
266 | /* |
267 | * Copy channel packet from RX bufring. |
268 | */ |
269 | rindex = vmbus_br_idxinc(idx: rbr->vbr->rindex, inc: skip, sz: br_dsize); |
270 | rindex = vmbus_rxbr_copyfrom(rbr, rindex, dst0: data, cplen: dlen); |
271 | |
272 | /* |
273 | * Discard this channel packet's 64bits offset, which is useless to us. |
274 | */ |
275 | rindex = vmbus_br_idxinc(idx: rindex, inc: sizeof(uint64_t), sz: br_dsize); |
276 | |
277 | /* Update the read index _after_ the channel packet is fetched. */ |
278 | rte_compiler_barrier(); |
279 | |
280 | vbr->rindex = rindex; |
281 | |
282 | return 0; |
283 | } |
284 | |
285 | int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr, |
286 | void *data, uint32_t *len) |
287 | { |
288 | struct vmbus_chanpkt_hdr pkt; |
289 | uint32_t dlen, bufferlen = *len; |
290 | int error; |
291 | |
292 | error = vmbus_rxbr_peek(rbr: rxbr, data: &pkt, dlen: sizeof(pkt)); |
293 | if (error) |
294 | return error; |
295 | |
296 | if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) |
297 | /* XXX this channel is dead actually. */ |
298 | return -EIO; |
299 | |
300 | if (unlikely(pkt.hlen > pkt.tlen)) |
301 | return -EIO; |
302 | |
303 | /* Length are in quad words */ |
304 | dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT; |
305 | *len = dlen; |
306 | |
307 | /* If caller buffer is not large enough */ |
308 | if (unlikely(dlen > bufferlen)) |
309 | return -ENOBUFS; |
310 | |
311 | /* Read data and skip packet header */ |
312 | error = vmbus_rxbr_read(rbr: rxbr, data, dlen, skip: 0); |
313 | if (error) |
314 | return error; |
315 | |
316 | /* Return the number of bytes read */ |
317 | return dlen + sizeof(uint64_t); |
318 | } |
319 | |