diff options
Diffstat (limited to 'udp_vu.c')
-rw-r--r-- | udp_vu.c | 436 |
1 files changed, 277 insertions, 159 deletions
@@ -1,6 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-or-later +/* udp_vu.c - UDP L2 vhost-user management functions + * + * Copyright Red Hat + * Author: Laurent Vivier <lvivier@redhat.com> + */ #include <unistd.h> +#include <assert.h> #include <net/ethernet.h> #include <net/if.h> #include <netinet/in.h> @@ -14,205 +20,317 @@ #include "checksum.h" #include "util.h" #include "ip.h" +#include "siphash.h" +#include "inany.h" #include "passt.h" #include "pcap.h" #include "log.h" #include "vhost_user.h" #include "udp_internal.h" +#include "flow.h" +#include "flow_table.h" +#include "udp_flow.h" #include "udp_vu.h" +#include "vu_common.h" -/* vhost-user */ -static const struct virtio_net_hdr vu_header = { - .flags = VIRTIO_NET_HDR_F_DATA_VALID, - .gso_type = VIRTIO_NET_HDR_GSO_NONE, -}; - -static unsigned char buffer[65536]; static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE]; -static unsigned int indexes [VIRTQUEUE_MAX_SIZE]; +static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE]; + +/** + * udp_vu_hdrlen() - return the size of the header in level 2 frame (UDP) + * @v6: Set for IPv6 packet + * + * Return: Return the size of the header + */ +static size_t udp_vu_hdrlen(bool v6) +{ + size_t hdrlen; + + hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) + + sizeof(struct ethhdr) + sizeof(struct udphdr); -void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, - const struct timespec *now) + if (v6) + hdrlen += sizeof(struct ipv6hdr); + else + hdrlen += sizeof(struct iphdr); + + return hdrlen; +} + +static int udp_vu_sock_init(int s, union sockaddr_inany *s_in) { - VuDev *vdev = (VuDev *)&c->vdev; - VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; - size_t l2_hdrlen, vnet_hdrlen, fillsize; - ssize_t data_len; - in_port_t dstport = ref.udp.port; - bool has_mrg_rxbuf, v6 = ref.udp.v6; - struct msghdr msg; - int i, iov_count, iov_used, virtqueue_max; - - if (c->no_udp || !(events & EPOLLIN)) - return; + struct msghdr msg = { + .msg_name = s_in, + .msg_namelen = sizeof(union sockaddr_inany), + }; - has_mrg_rxbuf = vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF); - if (has_mrg_rxbuf) { - vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); - virtqueue_max = VIRTQUEUE_MAX_SIZE; - } else { - vnet_hdrlen = sizeof(struct virtio_net_hdr); - virtqueue_max = 1; - } - l2_hdrlen = vnet_hdrlen + sizeof(struct ethhdr) + sizeof(struct udphdr); + return recvmsg(s, &msg, MSG_PEEK | MSG_DONTWAIT); +} + +/** + * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers + * @c: Execution context + * @s: Socket to receive from + * @events: epoll events bitmap + * @v6: Set for IPv6 connections + * @dlen: Size of received data (output) + * + * Return: Number of iov entries used to store the datagram + */ +static int udp_vu_sock_recv(const struct ctx *c, int s, uint32_t events, + bool v6, ssize_t *dlen) +{ + struct vu_dev *vdev = c->vdev; + struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + int iov_cnt, idx, iov_used; + struct msghdr msg = { 0 }; + size_t off, hdrlen; - if (v6) { - l2_hdrlen += sizeof(struct ipv6hdr); + ASSERT(!c->no_udp); - udp6_localname.sin6_port = htons(dstport); - msg.msg_name = &udp6_localname; - msg.msg_namelen = sizeof(udp6_localname); - } else { - l2_hdrlen += sizeof(struct iphdr); + if (!(events & EPOLLIN)) + return 0; + + /* compute L2 header length */ + hdrlen = udp_vu_hdrlen(v6); + + vu_init_elem(elem, iov_vu, VIRTQUEUE_MAX_SIZE); + + iov_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE, + IP_MAX_MTU - sizeof(struct udphdr) + hdrlen, + NULL); + if (iov_cnt == 0) + return 0; + + /* reserve space for the headers */ + iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen; + iov_vu[0].iov_len -= hdrlen; - udp4_localname.sin_port = htons(dstport); - msg.msg_name = &udp4_localname; - msg.msg_namelen = sizeof(udp4_localname); + /* read data from the socket */ + msg.msg_iov = iov_vu; + msg.msg_iovlen = iov_cnt; + + *dlen = recvmsg(s, &msg, 0); + if (*dlen < 0) { + vu_queue_rewind(vq, iov_cnt); + return 0; } - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; + /* restore the pointer to the headers address */ + iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen; + iov_vu[0].iov_len += hdrlen; - for (i = 0; i < UDP_MAX_FRAMES; i++) { - struct virtio_net_hdr_mrg_rxbuf *vh; - struct ethhdr *eh; - char *base; - size_t size; - - fillsize = USHRT_MAX; - iov_count = 0; - while (fillsize && iov_count < virtqueue_max) { - VuVirtqElement *elem; - - elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer); - if (!elem) - break; - - if (elem->in_num < 1) { - err("virtio-net receive queue contains no in buffers"); - vu_queue_rewind(vdev, vq, iov_count); - return; - } - ASSERT(elem->in_num == 1); - ASSERT(elem->in_sg[0].iov_len >= l2_hdrlen); + /* count the numbers of buffer filled by recvmsg() */ + idx = iov_skip_bytes(iov_vu, iov_cnt, *dlen + hdrlen, &off); - indexes[iov_count] = elem->index; - if (iov_count == 0) { - iov_vu[0].iov_base = (char *)elem->in_sg[0].iov_base + l2_hdrlen; - iov_vu[0].iov_len = elem->in_sg[0].iov_len - l2_hdrlen; - } else { - iov_vu[iov_count].iov_base = elem->in_sg[0].iov_base; - iov_vu[iov_count].iov_len = elem->in_sg[0].iov_len; - } + /* adjust last iov length */ + if (idx < iov_cnt) + iov_vu[idx].iov_len = off; + iov_used = idx + !!off; - if (iov_vu[iov_count].iov_len > fillsize) - iov_vu[iov_count].iov_len = fillsize; + vu_set_vnethdr(vdev, iov_vu[0].iov_base, iov_used); - fillsize -= iov_vu[iov_count].iov_len; + /* release unused buffers */ + vu_queue_rewind(vq, iov_cnt - iov_used); - iov_count++; - } - if (iov_count == 0) - break; + return iov_used; +} - msg.msg_iov = iov_vu; - msg.msg_iovlen = iov_count; +/** + * udp_vu_prepare() - Prepare the packet header + * @c: Execution context + * @toside: Address information for one side of the flow + * @dlen: Packet data length + * + * Return: Layer-4 length + */ +static size_t udp_vu_prepare(const struct ctx *c, + const struct flowside *toside, ssize_t dlen) +{ + struct ethhdr *eh; + size_t l4len; - data_len = recvmsg(ref.fd, &msg, 0); - if (data_len < 0) { - vu_queue_rewind(vdev, vq, iov_count); - return; - } + /* ethernet header */ + eh = vu_eth(iov_vu[0].iov_base); - iov_used = 0; - size = data_len; - while (size) { - if (iov_vu[iov_used].iov_len > size) - iov_vu[iov_used].iov_len = size; + memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); + memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); - size -= iov_vu[iov_used].iov_len; - iov_used++; - } + /* initialize header */ + if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { + struct iphdr *iph = vu_ip(iov_vu[0].iov_base); + struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base); - base = (char *)iov_vu[0].iov_base - l2_hdrlen; - size = iov_vu[0].iov_len + l2_hdrlen; + eh->h_proto = htons(ETH_P_IP); - /* release unused buffers */ - vu_queue_rewind(vdev, vq, iov_count - iov_used); + *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); - /* vnet_header */ - vh = (struct virtio_net_hdr_mrg_rxbuf *)base; - vh->hdr = vu_header; - if (has_mrg_rxbuf) - vh->num_buffers = htole16(iov_used); + l4len = udp_update_hdr4(iph, bp, toside, dlen, true); + } else { + struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base); + struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base); - /* ethernet header */ - eh = (struct ethhdr *)(base + vnet_hdrlen); + eh->h_proto = htons(ETH_P_IPV6); - memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest)); - memcpy(eh->h_source, c->mac, sizeof(eh->h_source)); + *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); - /* initialize header */ - if (v6) { - struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1); - struct udphdr *uh = (struct udphdr *)(ip6h + 1); - uint32_t sum; + l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true); + } - eh->h_proto = htons(ETH_P_IPV6); + return l4len; +} - *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); +/** + * udp_vu_csum() - Calculate and set checksum for a UDP packet + * @toside: ddress information for one side of the flow + * @l4len: IPv4 Payload length + * @iov_used: Length of the array + */ +static void udp_vu_csum(const struct flowside *toside, int iov_used) +{ + const struct in_addr *src4 = inany_v4(&toside->oaddr); + const struct in_addr *dst4 = inany_v4(&toside->eaddr); + char *base = iov_vu[0].iov_base; + struct udp_payload_t *bp; + + if (src4 && dst4) { + bp = vu_payloadv4(base); + csum_udp4(&bp->uh, *src4, *dst4, iov_vu, iov_used, + (char *)&bp->data - base); + } else { + bp = vu_payloadv6(base); + csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, + iov_vu, iov_used, (char *)&bp->data - base); + } +} - udp_update_hdr6(c, ip6h, data_len, &udp6_localname, - dstport, now); - if (*c->pcap) { - sum = proto_ipv6_header_psum(ip6h->payload_len, - IPPROTO_UDP, - &ip6h->saddr, - &ip6h->daddr); +/** + * udp_vu_listen_sock_handler() - Handle new data from socket + * @c: Execution context + * @ref: epoll reference + * @events: epoll events bitmap + * @now: Current timestamp + */ +void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref, + uint32_t events, const struct timespec *now) +{ + struct vu_dev *vdev = c->vdev; + struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + int i; - iov_vu[0].iov_base = uh; - iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh); - uh->check = csum_iov(iov_vu, iov_used, sum); - } - } else { - struct iphdr *iph = (struct iphdr *)(eh + 1); - struct udphdr *uh = (struct udphdr *)(iph + 1); - uint32_t sum; - - eh->h_proto = htons(ETH_P_IP); - - *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); - - udp_update_hdr4(c, iph, data_len, &udp4_localname, - dstport, now); - if (*c->pcap) { - sum = proto_ipv4_header_psum(iph->tot_len, - IPPROTO_UDP, - (struct in_addr){ .s_addr = iph->saddr }, - (struct in_addr){ .s_addr = iph->daddr }); - - iov_vu[0].iov_base = uh; - iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh); - uh->check = csum_iov(iov_vu, iov_used, sum); + if (udp_sock_errs(c, ref.fd, events) < 0) { + err("UDP: Unrecoverable error on listening socket:" + " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); + return; + } + + for (i = 0; i < UDP_MAX_FRAMES; i++) { + const struct flowside *toside; + union sockaddr_inany s_in; + flow_sidx_t sidx; + uint8_t pif; + ssize_t dlen; + int iov_used; + bool v6; + + if (udp_vu_sock_init(ref.fd, &s_in) < 0) + break; + + sidx = udp_flow_from_sock(c, ref, &s_in, now); + pif = pif_at_sidx(sidx); + + if (pif != PIF_TAP) { + if (flow_sidx_valid(sidx)) { + flow_sidx_t fromsidx = flow_sidx_opposite(sidx); + struct udp_flow *uflow = udp_at_sidx(sidx); + + flow_err(uflow, + "No support for forwarding UDP from %s to %s", + pif_name(pif_at_sidx(fromsidx)), + pif_name(pif)); + } else { + debug("Discarding 1 datagram without flow"); } + + continue; + } + + toside = flowside_at_sidx(sidx); + + v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); + + iov_used = udp_vu_sock_recv(c, ref.fd, events, v6, &dlen); + if (iov_used <= 0) + break; + + udp_vu_prepare(c, toside, dlen); + if (*c->pcap) { + udp_vu_csum(toside, iov_used); + pcap_iov(iov_vu, iov_used, + sizeof(struct virtio_net_hdr_mrg_rxbuf)); } + vu_flush(vdev, vq, elem, iov_used); + } +} + +/** + * udp_vu_reply_sock_handler() - Handle new data from flow specific socket + * @c: Execution context + * @ref: epoll reference + * @events: epoll events bitmap + * @now: Current timestamp + */ +void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref, + uint32_t events, const struct timespec *now) +{ + flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside); + const struct flowside *toside = flowside_at_sidx(tosidx); + struct udp_flow *uflow = udp_at_sidx(ref.flowside); + int from_s = uflow->s[ref.flowside.sidei]; + struct vu_dev *vdev = c->vdev; + struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + int i; + + ASSERT(!c->no_udp); + + if (udp_sock_errs(c, from_s, events) < 0) { + flow_err(uflow, "Unrecoverable error on reply socket"); + flow_err_details(uflow); + udp_flow_close(c, uflow); + return; + } + + for (i = 0; i < UDP_MAX_FRAMES; i++) { + uint8_t topif = pif_at_sidx(tosidx); + ssize_t dlen; + int iov_used; + bool v6; - /* set iov for pcap logging */ - iov_vu[0].iov_base = base + vnet_hdrlen; - iov_vu[0].iov_len = size - vnet_hdrlen; - pcap_iov(iov_vu, iov_used); + ASSERT(uflow); - /* set iov_len for vu_queue_fill_by_index(); */ - iov_vu[0].iov_base = base; - iov_vu[0].iov_len = size; + if (topif != PIF_TAP) { + uint8_t frompif = pif_at_sidx(ref.flowside); - /* send packets */ - for (i = 0; i < iov_used; i++) - vu_queue_fill_by_index(vdev, vq, indexes[i], - iov_vu[i].iov_len, i); + flow_err(uflow, + "No support for forwarding UDP from %s to %s", + pif_name(frompif), pif_name(topif)); + continue; + } + + v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); - vu_queue_flush(vdev, vq, iov_used); - vu_queue_notify(vdev, vq); + iov_used = udp_vu_sock_recv(c, from_s, events, v6, &dlen); + if (iov_used <= 0) + break; + flow_trace(uflow, "Received 1 datagram on reply socket"); + uflow->ts = now->tv_sec; + + udp_vu_prepare(c, toside, dlen); + if (*c->pcap) { + udp_vu_csum(toside, iov_used); + pcap_iov(iov_vu, iov_used, + sizeof(struct virtio_net_hdr_mrg_rxbuf)); + } + vu_flush(vdev, vq, elem, iov_used); } } |