| 1 | // SPDX-License-Identifier: BSD-3-Clause |
| 2 | /* |
| 3 | * Copyright (c) 2009-2012,2016,2023 Microsoft Corp. |
| 4 | * Copyright (c) 2012 NetApp Inc. |
| 5 | * Copyright (c) 2012 Citrix Inc. |
| 6 | * All rights reserved. |
| 7 | */ |
| 8 | |
| 9 | #include <errno.h> |
| 10 | #include <fcntl.h> |
| 11 | #include <emmintrin.h> |
| 12 | #include <linux/limits.h> |
| 13 | #include <stdbool.h> |
| 14 | #include <stdint.h> |
| 15 | #include <stdio.h> |
| 16 | #include <string.h> |
| 17 | #include <sys/mman.h> |
| 18 | #include <sys/uio.h> |
| 19 | #include <unistd.h> |
| 20 | #include "vmbus_bufring.h" |
| 21 | |
| 22 | /** |
| 23 | * Compiler barrier. |
| 24 | * |
| 25 | * Guarantees that operation reordering does not occur at compile time |
| 26 | * for operations directly before and after the barrier. |
| 27 | */ |
| 28 | #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); }) |
| 29 | |
| 30 | #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF |
| 31 | #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1))))) |
| 32 | |
| 33 | void *vmbus_uio_map(int *fd, int size) |
| 34 | { |
| 35 | void *map; |
| 36 | |
| 37 | map = mmap(NULL, len: 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, fd: *fd, offset: 0); |
| 38 | if (map == MAP_FAILED) |
| 39 | return NULL; |
| 40 | |
| 41 | return map; |
| 42 | } |
| 43 | |
| 44 | /* Increase bufring index by inc with wraparound */ |
| 45 | static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz) |
| 46 | { |
| 47 | idx += inc; |
| 48 | if (idx >= sz) |
| 49 | idx -= sz; |
| 50 | |
| 51 | return idx; |
| 52 | } |
| 53 | |
| 54 | void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen) |
| 55 | { |
| 56 | br->vbr = buf; |
| 57 | br->windex = br->vbr->windex; |
| 58 | br->dsize = blen - sizeof(struct vmbus_bufring); |
| 59 | } |
| 60 | |
| 61 | static inline __always_inline void |
| 62 | rte_smp_mb(void) |
| 63 | { |
| 64 | asm volatile("lock addl $0, -128(%%rsp); " ::: "memory" ); |
| 65 | } |
| 66 | |
| 67 | static inline int |
| 68 | rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) |
| 69 | { |
| 70 | uint8_t res; |
| 71 | |
| 72 | asm volatile("lock ; " |
| 73 | "cmpxchgl %[src], %[dst];" |
| 74 | "sete %[res];" |
| 75 | : [res] "=a" (res), /* output */ |
| 76 | [dst] "=m" (*dst) |
| 77 | : [src] "r" (src), /* input */ |
| 78 | "a" (exp), |
| 79 | "m" (*dst) |
| 80 | : "memory" ); /* no-clobber list */ |
| 81 | return res; |
| 82 | } |
| 83 | |
| 84 | static inline uint32_t |
| 85 | vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex, |
| 86 | const void *src0, uint32_t cplen) |
| 87 | { |
| 88 | uint8_t *br_data = tbr->vbr->data; |
| 89 | uint32_t br_dsize = tbr->dsize; |
| 90 | const uint8_t *src = src0; |
| 91 | |
| 92 | /* XXX use double mapping like Linux kernel? */ |
| 93 | if (cplen > br_dsize - windex) { |
| 94 | uint32_t fraglen = br_dsize - windex; |
| 95 | |
| 96 | /* Wrap-around detected */ |
| 97 | memcpy(dest: br_data + windex, src: src, n: fraglen); |
| 98 | memcpy(dest: br_data, src: src + fraglen, n: cplen - fraglen); |
| 99 | } else { |
| 100 | memcpy(dest: br_data + windex, src: src, n: cplen); |
| 101 | } |
| 102 | |
| 103 | return vmbus_br_idxinc(idx: windex, inc: cplen, sz: br_dsize); |
| 104 | } |
| 105 | |
| 106 | /* |
| 107 | * Write scattered channel packet to TX bufring. |
| 108 | * |
| 109 | * The offset of this channel packet is written as a 64bits value |
| 110 | * immediately after this channel packet. |
| 111 | * |
| 112 | * The write goes through three stages: |
| 113 | * 1. Reserve space in ring buffer for the new data. |
| 114 | * Writer atomically moves priv_write_index. |
| 115 | * 2. Copy the new data into the ring. |
| 116 | * 3. Update the tail of the ring (visible to host) that indicates |
| 117 | * next read location. Writer updates write_index |
| 118 | */ |
| 119 | static int |
| 120 | vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen) |
| 121 | { |
| 122 | struct vmbus_bufring *vbr = tbr->vbr; |
| 123 | uint32_t ring_size = tbr->dsize; |
| 124 | uint32_t old_windex, next_windex, windex, total; |
| 125 | uint64_t save_windex; |
| 126 | int i; |
| 127 | |
| 128 | total = 0; |
| 129 | for (i = 0; i < iovlen; i++) |
| 130 | total += iov[i].iov_len; |
| 131 | total += sizeof(save_windex); |
| 132 | |
| 133 | /* Reserve space in ring */ |
| 134 | do { |
| 135 | uint32_t avail; |
| 136 | |
| 137 | /* Get current free location */ |
| 138 | old_windex = tbr->windex; |
| 139 | |
| 140 | /* Prevent compiler reordering this with calculation */ |
| 141 | rte_compiler_barrier(); |
| 142 | |
| 143 | avail = vmbus_br_availwrite(br: tbr, windex: old_windex); |
| 144 | |
| 145 | /* If not enough space in ring, then tell caller. */ |
| 146 | if (avail <= total) |
| 147 | return -EAGAIN; |
| 148 | |
| 149 | next_windex = vmbus_br_idxinc(idx: old_windex, inc: total, sz: ring_size); |
| 150 | |
| 151 | /* Atomic update of next write_index for other threads */ |
| 152 | } while (!rte_atomic32_cmpset(dst: &tbr->windex, exp: old_windex, src: next_windex)); |
| 153 | |
| 154 | /* Space from old..new is now reserved */ |
| 155 | windex = old_windex; |
| 156 | for (i = 0; i < iovlen; i++) |
| 157 | windex = vmbus_txbr_copyto(tbr, windex, src0: iov[i].iov_base, cplen: iov[i].iov_len); |
| 158 | |
| 159 | /* Set the offset of the current channel packet. */ |
| 160 | save_windex = ((uint64_t)old_windex) << 32; |
| 161 | windex = vmbus_txbr_copyto(tbr, windex, src0: &save_windex, |
| 162 | cplen: sizeof(save_windex)); |
| 163 | |
| 164 | /* The region reserved should match region used */ |
| 165 | if (windex != next_windex) |
| 166 | return -EINVAL; |
| 167 | |
| 168 | /* Ensure that data is available before updating host index */ |
| 169 | rte_compiler_barrier(); |
| 170 | |
| 171 | /* Checkin for our reservation. wait for our turn to update host */ |
| 172 | while (!rte_atomic32_cmpset(dst: &vbr->windex, exp: old_windex, src: next_windex)) |
| 173 | _mm_pause(); |
| 174 | |
| 175 | return 0; |
| 176 | } |
| 177 | |
| 178 | int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data, |
| 179 | uint32_t dlen, uint32_t flags) |
| 180 | { |
| 181 | struct vmbus_chanpkt pkt; |
| 182 | unsigned int pktlen, pad_pktlen; |
| 183 | const uint32_t hlen = sizeof(pkt); |
| 184 | uint64_t pad = 0; |
| 185 | struct iovec iov[3]; |
| 186 | int error; |
| 187 | |
| 188 | pktlen = hlen + dlen; |
| 189 | pad_pktlen = ALIGN(pktlen, sizeof(uint64_t)); |
| 190 | |
| 191 | pkt.hdr.type = type; |
| 192 | pkt.hdr.flags = flags; |
| 193 | pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; |
| 194 | pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; |
| 195 | pkt.hdr.xactid = VMBUS_RQST_ERROR; |
| 196 | |
| 197 | iov[0].iov_base = &pkt; |
| 198 | iov[0].iov_len = hlen; |
| 199 | iov[1].iov_base = data; |
| 200 | iov[1].iov_len = dlen; |
| 201 | iov[2].iov_base = &pad; |
| 202 | iov[2].iov_len = pad_pktlen - pktlen; |
| 203 | |
| 204 | error = vmbus_txbr_write(tbr: txbr, iov, iovlen: 3); |
| 205 | |
| 206 | return error; |
| 207 | } |
| 208 | |
| 209 | static inline uint32_t |
| 210 | vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex, |
| 211 | void *dst0, size_t cplen) |
| 212 | { |
| 213 | const uint8_t *br_data = rbr->vbr->data; |
| 214 | uint32_t br_dsize = rbr->dsize; |
| 215 | uint8_t *dst = dst0; |
| 216 | |
| 217 | if (cplen > br_dsize - rindex) { |
| 218 | uint32_t fraglen = br_dsize - rindex; |
| 219 | |
| 220 | /* Wrap-around detected. */ |
| 221 | memcpy(dest: dst, src: br_data + rindex, n: fraglen); |
| 222 | memcpy(dest: dst + fraglen, src: br_data, n: cplen - fraglen); |
| 223 | } else { |
| 224 | memcpy(dest: dst, src: br_data + rindex, n: cplen); |
| 225 | } |
| 226 | |
| 227 | return vmbus_br_idxinc(idx: rindex, inc: cplen, sz: br_dsize); |
| 228 | } |
| 229 | |
| 230 | /* Copy data from receive ring but don't change index */ |
| 231 | static int |
| 232 | vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen) |
| 233 | { |
| 234 | uint32_t avail; |
| 235 | |
| 236 | /* |
| 237 | * The requested data and the 64bits channel packet |
| 238 | * offset should be there at least. |
| 239 | */ |
| 240 | avail = vmbus_br_availread(br: rbr); |
| 241 | if (avail < dlen + sizeof(uint64_t)) |
| 242 | return -EAGAIN; |
| 243 | |
| 244 | vmbus_rxbr_copyfrom(rbr, rindex: rbr->vbr->rindex, dst0: data, cplen: dlen); |
| 245 | return 0; |
| 246 | } |
| 247 | |
| 248 | /* |
| 249 | * Copy data from receive ring and change index |
| 250 | * NOTE: |
| 251 | * We assume (dlen + skip) == sizeof(channel packet). |
| 252 | */ |
| 253 | static int |
| 254 | vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip) |
| 255 | { |
| 256 | struct vmbus_bufring *vbr = rbr->vbr; |
| 257 | uint32_t br_dsize = rbr->dsize; |
| 258 | uint32_t rindex; |
| 259 | |
| 260 | if (vmbus_br_availread(br: rbr) < dlen + skip + sizeof(uint64_t)) |
| 261 | return -EAGAIN; |
| 262 | |
| 263 | /* Record where host was when we started read (for debug) */ |
| 264 | rbr->windex = rbr->vbr->windex; |
| 265 | |
| 266 | /* |
| 267 | * Copy channel packet from RX bufring. |
| 268 | */ |
| 269 | rindex = vmbus_br_idxinc(idx: rbr->vbr->rindex, inc: skip, sz: br_dsize); |
| 270 | rindex = vmbus_rxbr_copyfrom(rbr, rindex, dst0: data, cplen: dlen); |
| 271 | |
| 272 | /* |
| 273 | * Discard this channel packet's 64bits offset, which is useless to us. |
| 274 | */ |
| 275 | rindex = vmbus_br_idxinc(idx: rindex, inc: sizeof(uint64_t), sz: br_dsize); |
| 276 | |
| 277 | /* Update the read index _after_ the channel packet is fetched. */ |
| 278 | rte_compiler_barrier(); |
| 279 | |
| 280 | vbr->rindex = rindex; |
| 281 | |
| 282 | return 0; |
| 283 | } |
| 284 | |
| 285 | int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr, |
| 286 | void *data, uint32_t *len) |
| 287 | { |
| 288 | struct vmbus_chanpkt_hdr pkt; |
| 289 | uint32_t dlen, bufferlen = *len; |
| 290 | int error; |
| 291 | |
| 292 | error = vmbus_rxbr_peek(rbr: rxbr, data: &pkt, dlen: sizeof(pkt)); |
| 293 | if (error) |
| 294 | return error; |
| 295 | |
| 296 | if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) |
| 297 | /* XXX this channel is dead actually. */ |
| 298 | return -EIO; |
| 299 | |
| 300 | if (unlikely(pkt.hlen > pkt.tlen)) |
| 301 | return -EIO; |
| 302 | |
| 303 | /* Length are in quad words */ |
| 304 | dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT; |
| 305 | *len = dlen; |
| 306 | |
| 307 | /* If caller buffer is not large enough */ |
| 308 | if (unlikely(dlen > bufferlen)) |
| 309 | return -ENOBUFS; |
| 310 | |
| 311 | /* Read data and skip packet header */ |
| 312 | error = vmbus_rxbr_read(rbr: rxbr, data, dlen, skip: 0); |
| 313 | if (error) |
| 314 | return error; |
| 315 | |
| 316 | /* Return the number of bytes read */ |
| 317 | return dlen + sizeof(uint64_t); |
| 318 | } |
| 319 | |