diff options
Diffstat (limited to 'vhost_user.c')
-rw-r--r-- | vhost_user.c | 1155 |
1 files changed, 535 insertions, 620 deletions
diff --git a/vhost_user.c b/vhost_user.c index 9cc07c8..51c90db 100644 --- a/vhost_user.c +++ b/vhost_user.c @@ -1,6 +1,24 @@ // SPDX-License-Identifier: GPL-2.0-or-later - -/* some parts from QEMU subprojects/libvhost-user/libvhost-user.c */ +/* + * vhost-user API, command management and virtio interface + * + * Copyright Red Hat + * Author: Laurent Vivier <lvivier@redhat.com> + * + * Some parts from QEMU subprojects/libvhost-user/libvhost-user.c + * licensed under the following terms: + * + * Copyright IBM, Corp. 2007 + * Copyright (c) 2016 Red Hat, Inc. + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Marc-André Lureau <mlureau@redhat.com> + * Victor Kaplansky <victork@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ #include <errno.h> #include <fcntl.h> @@ -25,91 +43,112 @@ #include "passt.h" #include "tap.h" #include "vhost_user.h" +#include "pcap.h" +/* vhost-user version we are compatible with */ #define VHOST_USER_VERSION 1 -static unsigned char buffer[65536][VHOST_USER_MAX_QUEUES]; +static struct vu_dev vdev_storage; +/** + * vu_print_capabilities() - print vhost-user capabilities + * this is part of the vhost-user backend + * convention. + */ void vu_print_capabilities(void) { - printf("{\n"); - printf(" \"type\": \"net\"\n"); - printf("}\n"); + info("{"); + info(" \"type\": \"net\""); + info("}"); exit(EXIT_SUCCESS); } -static const char * -vu_request_to_string(unsigned int req) +/** + * vu_request_to_string() - convert a vhost-user request number to its name + * @req: request number + * + * Return: the name of request number + */ +static const char *vu_request_to_string(unsigned int req) { + if (req < VHOST_USER_MAX) { #define REQ(req) [req] = #req - static const char *vu_request_str[] = { - REQ(VHOST_USER_NONE), - REQ(VHOST_USER_GET_FEATURES), - REQ(VHOST_USER_SET_FEATURES), - REQ(VHOST_USER_SET_OWNER), - REQ(VHOST_USER_RESET_OWNER), - REQ(VHOST_USER_SET_MEM_TABLE), - REQ(VHOST_USER_SET_LOG_BASE), - REQ(VHOST_USER_SET_LOG_FD), - REQ(VHOST_USER_SET_VRING_NUM), - REQ(VHOST_USER_SET_VRING_ADDR), - REQ(VHOST_USER_SET_VRING_BASE), - REQ(VHOST_USER_GET_VRING_BASE), - REQ(VHOST_USER_SET_VRING_KICK), - REQ(VHOST_USER_SET_VRING_CALL), - REQ(VHOST_USER_SET_VRING_ERR), - REQ(VHOST_USER_GET_PROTOCOL_FEATURES), - REQ(VHOST_USER_SET_PROTOCOL_FEATURES), - REQ(VHOST_USER_GET_QUEUE_NUM), - REQ(VHOST_USER_SET_VRING_ENABLE), - REQ(VHOST_USER_SEND_RARP), - REQ(VHOST_USER_NET_SET_MTU), - REQ(VHOST_USER_SET_BACKEND_REQ_FD), - REQ(VHOST_USER_IOTLB_MSG), - REQ(VHOST_USER_SET_VRING_ENDIAN), - REQ(VHOST_USER_GET_CONFIG), - REQ(VHOST_USER_SET_CONFIG), - REQ(VHOST_USER_POSTCOPY_ADVISE), - REQ(VHOST_USER_POSTCOPY_LISTEN), - REQ(VHOST_USER_POSTCOPY_END), - REQ(VHOST_USER_GET_INFLIGHT_FD), - REQ(VHOST_USER_SET_INFLIGHT_FD), - REQ(VHOST_USER_GPU_SET_SOCKET), - REQ(VHOST_USER_VRING_KICK), - REQ(VHOST_USER_GET_MAX_MEM_SLOTS), - REQ(VHOST_USER_ADD_MEM_REG), - REQ(VHOST_USER_REM_MEM_REG), - REQ(VHOST_USER_MAX), - }; + static const char * const vu_request_str[VHOST_USER_MAX] = { + REQ(VHOST_USER_NONE), + REQ(VHOST_USER_GET_FEATURES), + REQ(VHOST_USER_SET_FEATURES), + REQ(VHOST_USER_SET_OWNER), + REQ(VHOST_USER_RESET_OWNER), + REQ(VHOST_USER_SET_MEM_TABLE), + REQ(VHOST_USER_SET_LOG_BASE), + REQ(VHOST_USER_SET_LOG_FD), + REQ(VHOST_USER_SET_VRING_NUM), + REQ(VHOST_USER_SET_VRING_ADDR), + REQ(VHOST_USER_SET_VRING_BASE), + REQ(VHOST_USER_GET_VRING_BASE), + REQ(VHOST_USER_SET_VRING_KICK), + REQ(VHOST_USER_SET_VRING_CALL), + REQ(VHOST_USER_SET_VRING_ERR), + REQ(VHOST_USER_GET_PROTOCOL_FEATURES), + REQ(VHOST_USER_SET_PROTOCOL_FEATURES), + REQ(VHOST_USER_GET_QUEUE_NUM), + REQ(VHOST_USER_SET_VRING_ENABLE), + REQ(VHOST_USER_SEND_RARP), + REQ(VHOST_USER_NET_SET_MTU), + REQ(VHOST_USER_SET_BACKEND_REQ_FD), + REQ(VHOST_USER_IOTLB_MSG), + REQ(VHOST_USER_SET_VRING_ENDIAN), + REQ(VHOST_USER_GET_CONFIG), + REQ(VHOST_USER_SET_CONFIG), + REQ(VHOST_USER_POSTCOPY_ADVISE), + REQ(VHOST_USER_POSTCOPY_LISTEN), + REQ(VHOST_USER_POSTCOPY_END), + REQ(VHOST_USER_GET_INFLIGHT_FD), + REQ(VHOST_USER_SET_INFLIGHT_FD), + REQ(VHOST_USER_GPU_SET_SOCKET), + REQ(VHOST_USER_VRING_KICK), + REQ(VHOST_USER_GET_MAX_MEM_SLOTS), + REQ(VHOST_USER_ADD_MEM_REG), + REQ(VHOST_USER_REM_MEM_REG), + }; #undef REQ - - if (req < VHOST_USER_MAX) { return vu_request_str[req]; - } else { - return "unknown"; } + + return "unknown"; } -/* Translate qemu virtual address to our virtual address. */ -static void *qva_to_va(VuDev *dev, uint64_t qemu_addr) +/** + * qva_to_va() - Translate front-end (QEMU) virtual address to our virtual + * address + * @dev: vhost-user device + * @qemu_addr: front-end userspace address + * + * Return: the memory address in our process virtual address space. + */ +static void *qva_to_va(struct vu_dev *dev, uint64_t qemu_addr) { unsigned int i; /* Find matching memory region. */ for (i = 0; i < dev->nregions; i++) { - VuDevRegion *r = &dev->regions[i]; + const struct vu_dev_region *r = &dev->regions[i]; if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { - return (void *)(uintptr_t) - (qemu_addr - r->qva + r->mmap_addr + r->mmap_offset); + /* NOLINTNEXTLINE(performance-no-int-to-ptr) */ + return (void *)(qemu_addr - r->qva + r->mmap_addr + + r->mmap_offset); } } return NULL; } -static void -vmsg_close_fds(VhostUserMsg *vmsg) +/** + * vmsg_close_fds() - Close all file descriptors of a given message + * @vmsg: vhost-user message with the list of the file descriptors + */ +static void vmsg_close_fds(const struct vhost_user_msg *vmsg) { int i; @@ -117,15 +156,24 @@ vmsg_close_fds(VhostUserMsg *vmsg) close(vmsg->fds[i]); } -static void vu_remove_watch(VuDev *vdev, int fd) +/** + * vu_remove_watch() - Remove a file descriptor from our passt epoll + * file descriptor + * @vdev: vhost-user device + * @fd: file descriptor to remove + */ +static void vu_remove_watch(const struct vu_dev *vdev, int fd) { - struct ctx *c = (struct ctx *) ((char *)vdev - offsetof(struct ctx, vdev)); - - epoll_ctl(c->epollfd, EPOLL_CTL_DEL, fd, NULL); + epoll_ctl(vdev->context->epollfd, EPOLL_CTL_DEL, fd, NULL); } -/* Set reply payload.u64 and clear request flags and fd_num */ -static void vmsg_set_reply_u64(struct VhostUserMsg *vmsg, uint64_t val) +/** + * vmsg_set_reply_u64() - Set reply payload.u64 and clear request flags + * and fd_num + * @vmsg: vhost-user message + * @val: 64-bit value to reply + */ +static void vmsg_set_reply_u64(struct vhost_user_msg *vmsg, uint64_t val) { vmsg->hdr.flags = 0; /* defaults will be set by vu_send_reply() */ vmsg->hdr.size = sizeof(vmsg->payload.u64); @@ -133,7 +181,16 @@ static void vmsg_set_reply_u64(struct VhostUserMsg *vmsg, uint64_t val) vmsg->fd_num = 0; } -static ssize_t vu_message_read_default(VuDev *dev, int conn_fd, struct VhostUserMsg *vmsg) +/** + * vu_message_read_default() - Read incoming vhost-user message from the + * front-end + * @conn_fd: vhost-user command socket + * @vmsg: vhost-user message + * + * Return: 0 if recvmsg() has been interrupted or if there's no data to read, + * 1 if a message has been received + */ +static int vu_message_read_default(int conn_fd, struct vhost_user_msg *vmsg) { char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = { 0 }; @@ -147,16 +204,14 @@ static ssize_t vu_message_read_default(VuDev *dev, int conn_fd, struct VhostUser .msg_control = control, .msg_controllen = sizeof(control), }; - size_t fd_size; - struct cmsghdr *cmsg; ssize_t ret, sz_payload; + struct cmsghdr *cmsg; ret = recvmsg(conn_fd, &msg, MSG_DONTWAIT); if (ret < 0) { if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) return 0; - vu_panic(dev, "Error while recvmsg: %s", strerror(errno)); - goto out; + die_perror("vhost-user message receive (recvmsg)"); } vmsg->fd_num = 0; @@ -164,7 +219,11 @@ static ssize_t vu_message_read_default(VuDev *dev, int conn_fd, struct VhostUser cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { + size_t fd_size; + + ASSERT(cmsg->cmsg_len >= CMSG_LEN(0)); fd_size = cmsg->cmsg_len - CMSG_LEN(0); + ASSERT(fd_size <= sizeof(vmsg->fds)); vmsg->fd_num = fd_size / sizeof(int); memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); break; @@ -173,99 +232,106 @@ static ssize_t vu_message_read_default(VuDev *dev, int conn_fd, struct VhostUser sz_payload = vmsg->hdr.size; if ((size_t)sz_payload > sizeof(vmsg->payload)) { - vu_panic(dev, - "Error: too big message request: %d, size: vmsg->size: %zd, " + die("vhost-user message request too big: %d," + " size: vmsg->size: %zd, " "while sizeof(vmsg->payload) = %zu", vmsg->hdr.request, sz_payload, sizeof(vmsg->payload)); - goto out; } if (sz_payload) { - do { + do ret = recv(conn_fd, &vmsg->payload, sz_payload, 0); - } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); + while (ret < 0 && errno == EINTR); - if (ret < sz_payload) { - vu_panic(dev, "Error while reading: %s", strerror(errno)); - goto out; - } + if (ret < 0) + die_perror("vhost-user message receive"); + + if (ret == 0) + die("EOF on vhost-user message receive"); + + if (ret < sz_payload) + die("Short-read on vhost-user message receive"); } return 1; -out: - vmsg_close_fds(vmsg); - - return -ECONNRESET; } -static int vu_message_write(VuDev *dev, int conn_fd, struct VhostUserMsg *vmsg) +/** + * vu_message_write() - Send a message to the front-end + * @conn_fd: vhost-user command socket + * @vmsg: vhost-user message + * + * #syscalls:vu sendmsg + */ +static void vu_message_write(int conn_fd, struct vhost_user_msg *vmsg) { - int rc; - uint8_t *p = (uint8_t *)vmsg; char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = { 0 }; struct iovec iov = { .iov_base = (char *)vmsg, - .iov_len = VHOST_USER_HDR_SIZE, + .iov_len = VHOST_USER_HDR_SIZE + vmsg->hdr.size, }; struct msghdr msg = { .msg_iov = &iov, .msg_iovlen = 1, .msg_control = control, }; - struct cmsghdr *cmsg; + int rc; - memset(control, 0, sizeof(control)); - assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS); + ASSERT(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS); if (vmsg->fd_num > 0) { size_t fdsize = vmsg->fd_num * sizeof(int); + struct cmsghdr *cmsg; + msg.msg_controllen = CMSG_SPACE(fdsize); cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_len = CMSG_LEN(fdsize); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize); - } else { - msg.msg_controllen = 0; } - do { + do rc = sendmsg(conn_fd, &msg, 0); - } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); - - if (vmsg->hdr.size) { - do { - if (vmsg->data) { - rc = write(conn_fd, vmsg->data, vmsg->hdr.size); - } else { - rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->hdr.size); - } - } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); - } + while (rc < 0 && errno == EINTR); - if (rc <= 0) { - vu_panic(dev, "Error while writing: %s", strerror(errno)); - return false; - } + if (rc < 0) + die_perror("vhost-user message send"); - return true; + if ((uint32_t)rc < VHOST_USER_HDR_SIZE + vmsg->hdr.size) + die("EOF on vhost-user message send"); } -static int vu_send_reply(VuDev *dev, int conn_fd, struct VhostUserMsg *msg) +/** + * vu_send_reply() - Update message flags and send it to front-end + * @conn_fd: vhost-user command socket + * @vmsg: vhost-user message + */ +static void vu_send_reply(int conn_fd, struct vhost_user_msg *msg) { msg->hdr.flags &= ~VHOST_USER_VERSION_MASK; msg->hdr.flags |= VHOST_USER_VERSION; msg->hdr.flags |= VHOST_USER_REPLY_MASK; - return vu_message_write(dev, conn_fd, msg); + vu_message_write(conn_fd, msg); } -static bool vu_get_features_exec(struct VhostUserMsg *msg) +/** + * vu_get_features_exec() - Provide back-end features bitmask to front-end + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: True as a reply is requested + */ +static bool vu_get_features_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { uint64_t features = 1ULL << VIRTIO_F_VERSION_1 | 1ULL << VIRTIO_NET_F_MRG_RXBUF | 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + (void)vdev; + vmsg_set_reply_u64(msg, features); debug("Sending back to guest u64: 0x%016"PRIx64, msg->payload.u64); @@ -273,54 +339,69 @@ static bool vu_get_features_exec(struct VhostUserMsg *msg) return true; } -static void -vu_set_enable_all_rings(VuDev *vdev, bool enabled) +/** + * vu_set_enable_all_rings() - Enable/disable all the virtqueues + * @vdev: vhost-user device + * @enable: New virtqueues state + */ +static void vu_set_enable_all_rings(struct vu_dev *vdev, bool enable) { uint16_t i; - for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) { - vdev->vq[i].enable = enabled; - } + for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) + vdev->vq[i].enable = enable; } -static bool -vu_set_features_exec(VuDev *vdev, struct VhostUserMsg *msg) +/** + * vu_set_features_exec() - Enable features of the back-end + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_features_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { debug("u64: 0x%016"PRIx64, msg->payload.u64); vdev->features = msg->payload.u64; - if (!vu_has_feature(vdev, VIRTIO_F_VERSION_1)) { - /* - * We only support devices conforming to VIRTIO 1.0 or - * later - */ - vu_panic(vdev, "virtio legacy devices aren't supported by passt"); - return false; - } + /* We only support devices conforming to VIRTIO 1.0 or + * later + */ + if (!vu_has_feature(vdev, VIRTIO_F_VERSION_1)) + die("virtio legacy devices aren't supported by passt"); - if (!vu_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) { + if (!vu_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) vu_set_enable_all_rings(vdev, true); - } - - /* virtio-net features */ - - if (vu_has_feature(vdev, VIRTIO_F_VERSION_1) || - vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { - vdev->hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); - } else { - vdev->hdrlen = sizeof(struct virtio_net_hdr); - } return false; } -static bool -vu_set_owner_exec(void) +/** + * vu_set_owner_exec() - Session start flag, do nothing in our case + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_owner_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { + (void)vdev; + (void)msg; + return false; } -static bool map_ring(VuDev *vdev, VuVirtq *vq) +/** + * map_ring() - Convert ring front-end (QEMU) addresses to our process + * virtual address space. + * @vdev: vhost-user device + * @vq: Virtqueue + * + * Return: True if ring cannot be mapped to our address space + */ +static bool map_ring(struct vu_dev *vdev, struct vu_virtq *vq) { vq->vring.desc = qva_to_va(vdev, vq->vra.desc_user_addr); vq->vring.used = qva_to_va(vdev, vq->vra.used_user_addr); @@ -334,51 +415,39 @@ static bool map_ring(VuDev *vdev, VuVirtq *vq) return !(vq->vring.desc && vq->vring.used && vq->vring.avail); } -int vu_packet_check_range(void *buf, size_t offset, size_t len, const char *start, - const char *func, int line) -{ - VuDevRegion *dev_region; - - for (dev_region = buf; dev_region->mmap_addr; dev_region++) { - if ((char *)dev_region->mmap_addr <= start && - start + offset + len < (char *)dev_region->mmap_addr + - dev_region->mmap_offset + - dev_region->size) - return 0; - } - if (func) { - trace("cannot find region, %s:%i", func, line); - } - - return -1; -} - -/* - * #syscalls:passt mmap munmap +/** + * vu_set_mem_table_exec() - Sets the memory map regions to be able to + * translate the vring addresses. + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + * + * #syscalls:vu mmap munmap */ - -static bool vu_set_mem_table_exec(VuDev *vdev, - struct VhostUserMsg *msg) +static bool vu_set_mem_table_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { + struct vhost_user_memory m = msg->payload.memory, *memory = &m; unsigned int i; - struct VhostUserMemory m = msg->payload.memory, *memory = &m; for (i = 0; i < vdev->nregions; i++) { - VuDevRegion *r = &vdev->regions[i]; - void *m = (void *) (uintptr_t) r->mmap_addr; + const struct vu_dev_region *r = &vdev->regions[i]; - if (m) - munmap(m, r->size + r->mmap_offset); + if (r->mmap_addr) { + /* NOLINTNEXTLINE(performance-no-int-to-ptr) */ + munmap((void *)r->mmap_addr, r->size + r->mmap_offset); + } } vdev->nregions = memory->nregions; - debug("Nregions: %u", memory->nregions); + debug("vhost-user nregions: %u", memory->nregions); for (i = 0; i < vdev->nregions; i++) { + struct vhost_user_memory_region *msg_region = &memory->regions[i]; + struct vu_dev_region *dev_region = &vdev->regions[i]; void *mmap_addr; - VhostUserMemory_region *msg_region = &memory->regions[i]; - VuDevRegion *dev_region = &vdev->regions[i]; - debug("Region %d", i); + debug("vhost-user region %d", i); debug(" guest_phys_addr: 0x%016"PRIx64, msg_region->guest_phys_addr); debug(" memory_size: 0x%016"PRIx64, @@ -394,76 +463,95 @@ static bool vu_set_mem_table_exec(VuDev *vdev, dev_region->mmap_offset = msg_region->mmap_offset; /* We don't use offset argument of mmap() since the - * mapped address has to be page aligned, and we use huge - * pages. */ + * mapped address has to be page aligned. + */ mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, - msg->fds[i], 0); - - if (mmap_addr == MAP_FAILED) { - vu_panic(vdev, "region mmap error: %s", strerror(errno)); - } else { - dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; - debug(" mmap_addr: 0x%016"PRIx64, - dev_region->mmap_addr); - } + PROT_READ | PROT_WRITE, MAP_SHARED | + MAP_NORESERVE, msg->fds[i], 0); + + if (mmap_addr == MAP_FAILED) + die_perror("vhost-user region mmap error"); + + dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; + debug(" mmap_addr: 0x%016"PRIx64, + dev_region->mmap_addr); close(msg->fds[i]); } for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) { if (vdev->vq[i].vring.desc) { - if (map_ring(vdev, &vdev->vq[i])) { - vu_panic(vdev, "remapping queue %d during setmemtable", i); - } + if (map_ring(vdev, &vdev->vq[i])) + die("remapping queue %d during setmemtable", i); } } - /* XXX */ + /* As vu_packet_check_range() has no access to the number of + * memory regions, mark the end of the array with mmap_addr = 0 + */ ASSERT(vdev->nregions < VHOST_USER_MAX_RAM_SLOTS - 1); - vdev->regions[vdev->nregions].mmap_addr = 0; /* mark EOF for vu_packet_check_range() */ + vdev->regions[vdev->nregions].mmap_addr = 0; - tap_sock_update_buf(vdev->regions, 0); + tap_sock_update_pool(vdev->regions, 0); return false; } -static bool vu_set_vring_num_exec(VuDev *vdev, - struct VhostUserMsg *msg) +/** + * vu_set_vring_num_exec() - Set the size of the queue (vring size) + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_vring_num_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - unsigned int index = msg->payload.state.index; + unsigned int idx = msg->payload.state.index; unsigned int num = msg->payload.state.num; - debug("State.index: %u", index); + debug("State.index: %u", idx); debug("State.num: %u", num); - vdev->vq[index].vring.num = num; + vdev->vq[idx].vring.num = num; return false; } -static bool vu_set_vring_addr_exec(VuDev *vdev, - struct VhostUserMsg *msg) +/** + * vu_set_vring_addr_exec() - Set the addresses of the vring + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_vring_addr_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - struct vhost_vring_addr addr = msg->payload.addr, *vra = &addr; - unsigned int index = vra->index; - VuVirtq *vq = &vdev->vq[index]; + /* We need to copy the payload to vhost_vring_addr structure + * to access index because address of msg->payload.addr + * can be unaligned as it is packed. + */ + struct vhost_vring_addr addr = msg->payload.addr; + struct vu_virtq *vq = &vdev->vq[addr.index]; debug("vhost_vring_addr:"); - debug(" index: %d", vra->index); - debug(" flags: %d", vra->flags); - debug(" desc_user_addr: 0x%016" PRIx64, (uint64_t)vra->desc_user_addr); - debug(" used_user_addr: 0x%016" PRIx64, (uint64_t)vra->used_user_addr); - debug(" avail_user_addr: 0x%016" PRIx64, (uint64_t)vra->avail_user_addr); - debug(" log_guest_addr: 0x%016" PRIx64, (uint64_t)vra->log_guest_addr); - - vq->vra = *vra; - vq->vring.flags = vra->flags; - vq->vring.log_guest_addr = vra->log_guest_addr; - - if (map_ring(vdev, vq)) { - vu_panic(vdev, "Invalid vring_addr message"); - return false; - } + debug(" index: %d", addr.index); + debug(" flags: %d", addr.flags); + debug(" desc_user_addr: 0x%016" PRIx64, + (uint64_t)addr.desc_user_addr); + debug(" used_user_addr: 0x%016" PRIx64, + (uint64_t)addr.used_user_addr); + debug(" avail_user_addr: 0x%016" PRIx64, + (uint64_t)addr.avail_user_addr); + debug(" log_guest_addr: 0x%016" PRIx64, + (uint64_t)addr.log_guest_addr); + + vq->vra = msg->payload.addr; + vq->vring.flags = addr.flags; + vq->vring.log_guest_addr = addr.log_guest_addr; + + if (map_ring(vdev, vq)) + die("Invalid vring_addr message"); vq->used_idx = le16toh(vq->vring.used->idx); @@ -474,392 +562,232 @@ static bool vu_set_vring_addr_exec(VuDev *vdev, return false; } - -static bool vu_set_vring_base_exec(VuDev *vdev, - struct VhostUserMsg *msg) +/** + * vu_set_vring_base_exec() - Sets the next index to use for descriptors + * in this vring + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_vring_base_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - unsigned int index = msg->payload.state.index; + unsigned int idx = msg->payload.state.index; unsigned int num = msg->payload.state.num; - debug("State.index: %u", index); + debug("State.index: %u", idx); debug("State.num: %u", num); - vdev->vq[index].shadow_avail_idx = vdev->vq[index].last_avail_idx = num; + vdev->vq[idx].shadow_avail_idx = vdev->vq[idx].last_avail_idx = num; return false; } -static bool vu_get_vring_base_exec(VuDev *vdev, - struct VhostUserMsg *msg) +/** + * vu_get_vring_base_exec() - Stops the vring and returns the current + * descriptor index or indices + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: True as a reply is requested + */ +static bool vu_get_vring_base_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - unsigned int index = msg->payload.state.index; + unsigned int idx = msg->payload.state.index; - debug("State.index: %u", index); - msg->payload.state.num = vdev->vq[index].last_avail_idx; + debug("State.index: %u", idx); + msg->payload.state.num = vdev->vq[idx].last_avail_idx; msg->hdr.size = sizeof(msg->payload.state); - vdev->vq[index].started = false; + vdev->vq[idx].started = false; - if (vdev->vq[index].call_fd != -1) { - close(vdev->vq[index].call_fd); - vdev->vq[index].call_fd = -1; + if (vdev->vq[idx].call_fd != -1) { + close(vdev->vq[idx].call_fd); + vdev->vq[idx].call_fd = -1; } - if (vdev->vq[index].kick_fd != -1) { - vu_remove_watch(vdev, vdev->vq[index].kick_fd); - close(vdev->vq[index].kick_fd); - vdev->vq[index].kick_fd = -1; + if (vdev->vq[idx].kick_fd != -1) { + vu_remove_watch(vdev, vdev->vq[idx].kick_fd); + close(vdev->vq[idx].kick_fd); + vdev->vq[idx].kick_fd = -1; } return true; } -static void vu_set_watch(VuDev *vdev, int fd) +/** + * vu_set_watch() - Add a file descriptor to the passt epoll file descriptor + * @vdev: vhost-user device + * @idx: queue index of the file descriptor to add + */ +static void vu_set_watch(const struct vu_dev *vdev, int idx) { - struct ctx *c = (struct ctx *) ((char *)vdev - offsetof(struct ctx, vdev)); - union epoll_ref ref = { .type = EPOLL_TYPE_VHOST_KICK, .fd = fd }; + union epoll_ref ref = { + .type = EPOLL_TYPE_VHOST_KICK, + .fd = vdev->vq[idx].kick_fd, + .queue = idx + }; struct epoll_event ev = { 0 }; ev.data.u64 = ref.u64; ev.events = EPOLLIN; - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev); -} - -int vu_send(const struct ctx *c, const void *buf, size_t size) -{ - VuDev *vdev = (VuDev *)&c->vdev; - size_t hdrlen = vdev->hdrlen; - VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; - unsigned int indexes[VIRTQUEUE_MAX_SIZE]; - size_t lens[VIRTQUEUE_MAX_SIZE]; - size_t offset; - int i, j; - __virtio16 *num_buffers_ptr; - - debug("vu_send size %zu hdrlen %zu", size, hdrlen); - - if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) { - err("Got packet, but no available descriptors on RX virtq."); - return 0; - } - - offset = 0; - i = 0; - num_buffers_ptr = NULL; - while (offset < size) { - VuVirtqElement *elem; - size_t len; - int total; - - total = 0; - - if (i == VIRTQUEUE_MAX_SIZE) { - err("virtio-net unexpected long buffer chain"); - goto err; - } - - elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), - buffer[VHOST_USER_RX_QUEUE]); - if (!elem) { - if (!vdev->broken) { - eventfd_t kick_data; - ssize_t rc; - int status; - - /* wait the kernel to put new entries in the queue */ - - status = fcntl(vq->kick_fd, F_GETFL); - if (status != -1) { - fcntl(vq->kick_fd, F_SETFL, status & ~O_NONBLOCK); - rc = eventfd_read(vq->kick_fd, &kick_data); - fcntl(vq->kick_fd, F_SETFL, status); - if (rc != -1) - continue; - } - } - if (i) { - err("virtio-net unexpected empty queue: " - "i %d mergeable %d offset %zd, size %zd, " - "features 0x%" PRIx64, - i, vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF), - offset, size, vdev->features); - } - offset = -1; - goto err; - } - - if (elem->in_num < 1) { - err("virtio-net receive queue contains no in buffers"); - vu_queue_detach_element(vdev, vq, elem->index, 0); - offset = -1; - goto err; - } - - if (i == 0) { - struct virtio_net_hdr hdr = { - .flags = VIRTIO_NET_HDR_F_DATA_VALID, - .gso_type = VIRTIO_NET_HDR_GSO_NONE, - }; - - ASSERT(offset == 0); - ASSERT(elem->in_sg[0].iov_len >= hdrlen); - - len = iov_from_buf(elem->in_sg, elem->in_num, 0, &hdr, sizeof hdr); - - num_buffers_ptr = (__virtio16 *)((char *)elem->in_sg[0].iov_base + - len); - - total += hdrlen; - } - - len = iov_from_buf(elem->in_sg, elem->in_num, total, (char *)buf + offset, - size - offset); - - total += len; - offset += len; - - /* If buffers can't be merged, at this point we - * must have consumed the complete packet. - * Otherwise, drop it. - */ - if (!vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) && offset < size) { - vu_queue_unpop(vdev, vq, elem->index, total); - goto err; - } - - indexes[i] = elem->index; - lens[i] = total; - i++; - } - - if (num_buffers_ptr && vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { - *num_buffers_ptr = htole16(i); - } - - for (j = 0; j < i; j++) { - debug("filling total %zd idx %d", lens[j], j); - vu_queue_fill_by_index(vdev, vq, indexes[j], lens[j], j); - } - - vu_queue_flush(vdev, vq, i); - vu_queue_notify(vdev, vq); - - debug("sent %zu", offset); - - return offset; -err: - for (j = 0; j < i; j++) { - vu_queue_detach_element(vdev, vq, indexes[j], lens[j]); - } - - return offset; + epoll_ctl(vdev->context->epollfd, EPOLL_CTL_ADD, ref.fd, &ev); } -size_t tap_send_frames_vu(const struct ctx *c, const struct iovec *iov, size_t n) -{ - size_t i; - int ret; - - debug("tap_send_frames_vu n %zd", n); - - for (i = 0; i < n; i++) { - ret = vu_send(c, iov[i].iov_base, iov[i].iov_len); - if (ret < 0) - break; - } - debug("count %zd", i); - return i; -} - -static void vu_handle_tx(VuDev *vdev, int index) -{ - struct ctx *c = (struct ctx *) ((char *)vdev - offsetof(struct ctx, vdev)); - VuVirtq *vq = &vdev->vq[index]; - int hdrlen = vdev->hdrlen; - struct timespec now; - unsigned int indexes[VIRTQUEUE_MAX_SIZE]; - int count; - - if (index % 2 != VHOST_USER_TX_QUEUE) { - debug("index %d is not an TX queue", index); - return; - } - - clock_gettime(CLOCK_MONOTONIC, &now); - - pool_flush_all(); - - count = 0; - while (1) { - VuVirtqElement *elem; - - ASSERT(index == VHOST_USER_TX_QUEUE); - elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer[index]); - if (!elem) { - break; - } - - if (elem->out_num < 1) { - debug("virtio-net header not in first element"); - break; - } - ASSERT(elem->out_num == 1); - - packet_add_all(c, elem->out_sg[0].iov_len - hdrlen, - (char *)elem->out_sg[0].iov_base + hdrlen); - indexes[count] = elem->index; - count++; - } - tap_handler_all(c, &now); - - if (count) { - int i; - for (i = 0; i < count; i++) - vu_queue_fill_by_index(vdev, vq, indexes[i], 0, i); - vu_queue_flush(vdev, vq, count); - vu_queue_notify(vdev, vq); - } -} - -void vu_kick_cb(struct ctx *c, union epoll_ref ref) -{ - VuDev *vdev = &c->vdev; - eventfd_t kick_data; - ssize_t rc; - int index; - - for (index = 0; index < VHOST_USER_MAX_QUEUES; index++) - if (c->vdev.vq[index].kick_fd == ref.fd) - break; - - if (index == VHOST_USER_MAX_QUEUES) - return; - - rc = eventfd_read(ref.fd, &kick_data); - if (rc == -1) { - vu_panic(vdev, "kick eventfd_read(): %s", strerror(errno)); - vu_remove_watch(vdev, ref.fd); - } else { - debug("Got kick_data: %016"PRIx64" idx:%d", - kick_data, index); - if (index % 2 == VHOST_USER_TX_QUEUE) - vu_handle_tx(vdev, index); - } -} - -static bool vu_check_queue_msg_file(VuDev *vdev, struct VhostUserMsg *msg) +/** + * vu_check_queue_msg_file() - Check if a message is valid, + * close fds if NOFD bit is set + * @vmsg: vhost-user message + */ +static void vu_check_queue_msg_file(struct vhost_user_msg *msg) { - int index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; + int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - if (index >= VHOST_USER_MAX_QUEUES) { - vmsg_close_fds(msg); - vu_panic(vdev, "Invalid queue index: %u", index); - return false; - } + if (idx >= VHOST_USER_MAX_QUEUES) + die("Invalid vhost-user queue index: %u", idx); if (nofd) { vmsg_close_fds(msg); - return true; - } - - if (msg->fd_num != 1) { - vmsg_close_fds(msg); - vu_panic(vdev, "Invalid fds in request: %d", msg->hdr.request); - return false; + return; } - return true; + if (msg->fd_num != 1) + die("Invalid fds in vhost-user request: %d", msg->hdr.request); } -static bool vu_set_vring_kick_exec(VuDev *vdev, - struct VhostUserMsg *msg) +/** + * vu_set_vring_kick_exec() - Set the event file descriptor for adding buffers + * to the vring + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_vring_kick_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - int index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; + int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; debug("u64: 0x%016"PRIx64, msg->payload.u64); - if (!vu_check_queue_msg_file(vdev, msg)) - return false; + vu_check_queue_msg_file(msg); - if (vdev->vq[index].kick_fd != -1) { - vu_remove_watch(vdev, vdev->vq[index].kick_fd); - close(vdev->vq[index].kick_fd); - vdev->vq[index].kick_fd = -1; + if (vdev->vq[idx].kick_fd != -1) { + vu_remove_watch(vdev, vdev->vq[idx].kick_fd); + close(vdev->vq[idx].kick_fd); + vdev->vq[idx].kick_fd = -1; } - vdev->vq[index].kick_fd = nofd ? -1 : msg->fds[0]; - debug("Got kick_fd: %d for vq: %d", vdev->vq[index].kick_fd, index); + if (!nofd) + vdev->vq[idx].kick_fd = msg->fds[0]; - vdev->vq[index].started = true; + debug("Got kick_fd: %d for vq: %d", vdev->vq[idx].kick_fd, idx); - if (vdev->vq[index].kick_fd != -1 && index % 2 == VHOST_USER_TX_QUEUE) { - vu_set_watch(vdev, vdev->vq[index].kick_fd); + vdev->vq[idx].started = true; + + if (vdev->vq[idx].kick_fd != -1 && VHOST_USER_IS_QUEUE_TX(idx)) { + vu_set_watch(vdev, idx); debug("Waiting for kicks on fd: %d for vq: %d", - vdev->vq[index].kick_fd, index); + vdev->vq[idx].kick_fd, idx); } return false; } -static bool vu_set_vring_call_exec(VuDev *vdev, - struct VhostUserMsg *msg) +/** + * vu_set_vring_call_exec() - Set the event file descriptor to signal when + * buffers are used + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_vring_call_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - int index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; + int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; debug("u64: 0x%016"PRIx64, msg->payload.u64); - if (!vu_check_queue_msg_file(vdev, msg)) - return false; + vu_check_queue_msg_file(msg); - if (vdev->vq[index].call_fd != -1) { - close(vdev->vq[index].call_fd); - vdev->vq[index].call_fd = -1; + if (vdev->vq[idx].call_fd != -1) { + close(vdev->vq[idx].call_fd); + vdev->vq[idx].call_fd = -1; } - vdev->vq[index].call_fd = nofd ? -1 : msg->fds[0]; + if (!nofd) + vdev->vq[idx].call_fd = msg->fds[0]; /* in case of I/O hang after reconnecting */ - if (vdev->vq[index].call_fd != -1) { + if (vdev->vq[idx].call_fd != -1) eventfd_write(msg->fds[0], 1); - } - debug("Got call_fd: %d for vq: %d", vdev->vq[index].call_fd, index); + debug("Got call_fd: %d for vq: %d", vdev->vq[idx].call_fd, idx); return false; } -static bool vu_set_vring_err_exec(VuDev *vdev, - struct VhostUserMsg *msg) +/** + * vu_set_vring_err_exec() - Set the event file descriptor to signal when + * error occurs + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_vring_err_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - int index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; + int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; debug("u64: 0x%016"PRIx64, msg->payload.u64); - if (!vu_check_queue_msg_file(vdev, msg)) - return false; + vu_check_queue_msg_file(msg); - if (vdev->vq[index].err_fd != -1) { - close(vdev->vq[index].err_fd); - vdev->vq[index].err_fd = -1; + if (vdev->vq[idx].err_fd != -1) { + close(vdev->vq[idx].err_fd); + vdev->vq[idx].err_fd = -1; } - vdev->vq[index].err_fd = nofd ? -1 : msg->fds[0]; + if (!nofd) + vdev->vq[idx].err_fd = msg->fds[0]; return false; } -static bool vu_get_protocol_features_exec(struct VhostUserMsg *msg) +/** + * vu_get_protocol_features_exec() - Provide the protocol (vhost-user) features + * to the front-end + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: True as a reply is requested + */ +static bool vu_get_protocol_features_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK; + (void)vdev; vmsg_set_reply_u64(msg, features); return true; } -static bool vu_set_protocol_features_exec(VuDev *vdev, struct VhostUserMsg *msg) +/** + * vu_set_protocol_features_exec() - Enable protocol (vhost-user) features + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_protocol_features_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { uint64_t features = msg->payload.u64; @@ -867,72 +795,80 @@ static bool vu_set_protocol_features_exec(VuDev *vdev, struct VhostUserMsg *msg) vdev->protocol_features = msg->payload.u64; - if (vu_has_protocol_feature(vdev, - VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && - (!vu_has_protocol_feature(vdev, VHOST_USER_PROTOCOL_F_BACKEND_REQ) || - !vu_has_protocol_feature(vdev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { - /* - * The use case for using messages for kick/call is simulation, to make - * the kick and call synchronous. To actually get that behaviour, both - * of the other features are required. - * Theoretically, one could use only kick messages, or do them without - * having F_REPLY_ACK, but too many (possibly pending) messages on the - * socket will eventually cause the master to hang, to avoid this in - * scenarios where not desired enforce that the settings are in a way - * that actually enables the simulation case. - */ - vu_panic(vdev, - "F_IN_BAND_NOTIFICATIONS requires F_BACKEND_REQ && F_REPLY_ACK"); - return false; - } - return false; } - -static bool vu_get_queue_num_exec(struct VhostUserMsg *msg) +/** + * vu_get_queue_num_exec() - Tell how many queues we support + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: True as a reply is requested + */ +static bool vu_get_queue_num_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { + (void)vdev; + vmsg_set_reply_u64(msg, VHOST_USER_MAX_QUEUES); + return true; } -static bool vu_set_vring_enable_exec(VuDev *vdev, struct VhostUserMsg *msg) +/** + * vu_set_vring_enable_exec() - Enable or disable corresponding vring + * @vdev: vhost-user device + * @vmsg: vhost-user message + * + * Return: False as no reply is requested + */ +static bool vu_set_vring_enable_exec(struct vu_dev *vdev, + struct vhost_user_msg *msg) { - unsigned int index = msg->payload.state.index; unsigned int enable = msg->payload.state.num; + unsigned int idx = msg->payload.state.index; - debug("State.index: %u", index); + debug("State.index: %u", idx); debug("State.enable: %u", enable); - if (index >= VHOST_USER_MAX_QUEUES) { - vu_panic(vdev, "Invalid vring_enable index: %u", index); - return false; - } + if (idx >= VHOST_USER_MAX_QUEUES) + die("Invalid vring_enable index: %u", idx); - vdev->vq[index].enable = enable; + vdev->vq[idx].enable = enable; return false; } +/** + * vu_init() - Initialize vhost-user device structure + * @c: execution context + * @vdev: vhost-user device + */ void vu_init(struct ctx *c) { int i; - c->vdev.hdrlen = 0; - for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) - c->vdev.vq[i] = (VuVirtq){ + c->vdev = &vdev_storage; + c->vdev->context = c; + for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) { + c->vdev->vq[i] = (struct vu_virtq){ .call_fd = -1, .kick_fd = -1, .err_fd = -1, .notification = true, }; + } } -static void vu_cleanup(VuDev *vdev) +/** + * vu_cleanup() - Reset vhost-user device + * @vdev: vhost-user device + */ +void vu_cleanup(struct vu_dev *vdev) { unsigned int i; for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) { - VuVirtq *vq = &vdev->vq[i]; + struct vu_virtq *vq = &vdev->vq[i]; vq->started = false; vq->notification = true; @@ -946,7 +882,7 @@ static void vu_cleanup(VuDev *vdev) vq->err_fd = -1; } if (vq->kick_fd != -1) { - vu_remove_watch(vdev, vq->kick_fd); + vu_remove_watch(vdev, vq->kick_fd); close(vq->kick_fd); vq->kick_fd = -1; } @@ -955,40 +891,66 @@ static void vu_cleanup(VuDev *vdev) vq->vring.used = 0; vq->vring.avail = 0; } - vdev->hdrlen = 0; for (i = 0; i < vdev->nregions; i++) { - VuDevRegion *r = &vdev->regions[i]; - void *m = (void *) (uintptr_t) r->mmap_addr; + const struct vu_dev_region *r = &vdev->regions[i]; - if (m) - munmap(m, r->size + r->mmap_offset); + if (r->mmap_addr) { + /* NOLINTNEXTLINE(performance-no-int-to-ptr) */ + munmap((void *)r->mmap_addr, r->size + r->mmap_offset); + } } vdev->nregions = 0; } /** - * tap_handler_vu() - Packet handler for vhost-user - * @c: Execution context + * vu_sock_reset() - Reset connection socket + * @vdev: vhost-user device + */ +static void vu_sock_reset(struct vu_dev *vdev) +{ + tap_sock_reset(vdev->context); +} + +static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev, + struct vhost_user_msg *msg) = { + [VHOST_USER_GET_FEATURES] = vu_get_features_exec, + [VHOST_USER_SET_FEATURES] = vu_set_features_exec, + [VHOST_USER_GET_PROTOCOL_FEATURES] = vu_get_protocol_features_exec, + [VHOST_USER_SET_PROTOCOL_FEATURES] = vu_set_protocol_features_exec, + [VHOST_USER_GET_QUEUE_NUM] = vu_get_queue_num_exec, + [VHOST_USER_SET_OWNER] = vu_set_owner_exec, + [VHOST_USER_SET_MEM_TABLE] = vu_set_mem_table_exec, + [VHOST_USER_SET_VRING_NUM] = vu_set_vring_num_exec, + [VHOST_USER_SET_VRING_ADDR] = vu_set_vring_addr_exec, + [VHOST_USER_SET_VRING_BASE] = vu_set_vring_base_exec, + [VHOST_USER_GET_VRING_BASE] = vu_get_vring_base_exec, + [VHOST_USER_SET_VRING_KICK] = vu_set_vring_kick_exec, + [VHOST_USER_SET_VRING_CALL] = vu_set_vring_call_exec, + [VHOST_USER_SET_VRING_ERR] = vu_set_vring_err_exec, + [VHOST_USER_SET_VRING_ENABLE] = vu_set_vring_enable_exec, +}; + +/** + * vu_control_handler() - Handle control commands for vhost-user + * @vdev: vhost-user device + * @fd: vhost-user message socket * @events: epoll events */ -void tap_handler_vu(struct ctx *c, uint32_t events) +void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events) { - VuDev *dev = &c->vdev; - struct VhostUserMsg msg = { 0 }; + struct vhost_user_msg msg = { 0 }; bool need_reply, reply_requested; int ret; if (events & (EPOLLRDHUP | EPOLLHUP | EPOLLERR)) { - tap_sock_reset(c); + vu_sock_reset(vdev); return; } - - ret = vu_message_read_default(dev, c->fd_tap, &msg); - if (ret <= 0) { - if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) - tap_sock_reset(c); + ret = vu_message_read_default(fd, &msg); + if (ret == 0) { + vu_sock_reset(vdev); return; } debug("================ Vhost user message ================"); @@ -998,60 +960,14 @@ void tap_handler_vu(struct ctx *c, uint32_t events) debug("Size: %u", msg.hdr.size); need_reply = msg.hdr.flags & VHOST_USER_NEED_REPLY_MASK; - switch (msg.hdr.request) { - case VHOST_USER_GET_FEATURES: - reply_requested = vu_get_features_exec(&msg); - break; - case VHOST_USER_SET_FEATURES: - reply_requested = vu_set_features_exec(dev, &msg); - break; - case VHOST_USER_GET_PROTOCOL_FEATURES: - reply_requested = vu_get_protocol_features_exec(&msg); - break; - case VHOST_USER_SET_PROTOCOL_FEATURES: - reply_requested = vu_set_protocol_features_exec(dev, &msg); - break; - case VHOST_USER_GET_QUEUE_NUM: - reply_requested = vu_get_queue_num_exec(&msg); - break; - case VHOST_USER_SET_OWNER: - reply_requested = vu_set_owner_exec(); - break; - case VHOST_USER_SET_MEM_TABLE: - reply_requested = vu_set_mem_table_exec(dev, &msg); - break; - case VHOST_USER_SET_VRING_NUM: - reply_requested = vu_set_vring_num_exec(dev, &msg); - break; - case VHOST_USER_SET_VRING_ADDR: - reply_requested = vu_set_vring_addr_exec(dev, &msg); - break; - case VHOST_USER_SET_VRING_BASE: - reply_requested = vu_set_vring_base_exec(dev, &msg); - break; - case VHOST_USER_GET_VRING_BASE: - reply_requested = vu_get_vring_base_exec(dev, &msg); - break; - case VHOST_USER_SET_VRING_KICK: - reply_requested = vu_set_vring_kick_exec(dev, &msg); - break; - case VHOST_USER_SET_VRING_CALL: - reply_requested = vu_set_vring_call_exec(dev, &msg); - break; - case VHOST_USER_SET_VRING_ERR: - reply_requested = vu_set_vring_err_exec(dev, &msg); - break; - case VHOST_USER_SET_VRING_ENABLE: - reply_requested = vu_set_vring_enable_exec(dev, &msg); - break; - case VHOST_USER_NONE: - vu_cleanup(dev); - return; - default: - vu_panic(dev, "Unhandled request: %d", msg.hdr.request); - return; - } + if (msg.hdr.request >= 0 && msg.hdr.request < VHOST_USER_MAX && + vu_handle[msg.hdr.request]) + reply_requested = vu_handle[msg.hdr.request](vdev, &msg); + else + die("Unhandled request: %d", msg.hdr.request); + + /* cppcheck-suppress legacyUninitvar */ if (!reply_requested && need_reply) { msg.payload.u64 = 0; msg.hdr.flags = 0; @@ -1061,6 +977,5 @@ void tap_handler_vu(struct ctx *c, uint32_t events) } if (reply_requested) - ret = vu_send_reply(dev, c->fd_tap, &msg); - free(msg.data); + vu_send_reply(fd, &msg); } |