aboutgitcodebugslistschat
path: root/udp_vu.c
diff options
context:
space:
mode:
Diffstat (limited to 'udp_vu.c')
-rw-r--r--udp_vu.c436
1 files changed, 277 insertions, 159 deletions
diff --git a/udp_vu.c b/udp_vu.c
index c2b2bfe..863b6da 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -1,6 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+/* udp_vu.c - UDP L2 vhost-user management functions
+ *
+ * Copyright Red Hat
+ * Author: Laurent Vivier <lvivier@redhat.com>
+ */
#include <unistd.h>
+#include <assert.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/in.h>
@@ -14,205 +20,317 @@
#include "checksum.h"
#include "util.h"
#include "ip.h"
+#include "siphash.h"
+#include "inany.h"
#include "passt.h"
#include "pcap.h"
#include "log.h"
#include "vhost_user.h"
#include "udp_internal.h"
+#include "flow.h"
+#include "flow_table.h"
+#include "udp_flow.h"
#include "udp_vu.h"
+#include "vu_common.h"
-/* vhost-user */
-static const struct virtio_net_hdr vu_header = {
- .flags = VIRTIO_NET_HDR_F_DATA_VALID,
- .gso_type = VIRTIO_NET_HDR_GSO_NONE,
-};
-
-static unsigned char buffer[65536];
static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
-static unsigned int indexes [VIRTQUEUE_MAX_SIZE];
+static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE];
+
+/**
+ * udp_vu_hdrlen() - return the size of the header in level 2 frame (UDP)
+ * @v6: Set for IPv6 packet
+ *
+ * Return: Return the size of the header
+ */
+static size_t udp_vu_hdrlen(bool v6)
+{
+ size_t hdrlen;
+
+ hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
+ sizeof(struct ethhdr) + sizeof(struct udphdr);
-void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
- const struct timespec *now)
+ if (v6)
+ hdrlen += sizeof(struct ipv6hdr);
+ else
+ hdrlen += sizeof(struct iphdr);
+
+ return hdrlen;
+}
+
+static int udp_vu_sock_init(int s, union sockaddr_inany *s_in)
{
- VuDev *vdev = (VuDev *)&c->vdev;
- VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
- size_t l2_hdrlen, vnet_hdrlen, fillsize;
- ssize_t data_len;
- in_port_t dstport = ref.udp.port;
- bool has_mrg_rxbuf, v6 = ref.udp.v6;
- struct msghdr msg;
- int i, iov_count, iov_used, virtqueue_max;
-
- if (c->no_udp || !(events & EPOLLIN))
- return;
+ struct msghdr msg = {
+ .msg_name = s_in,
+ .msg_namelen = sizeof(union sockaddr_inany),
+ };
- has_mrg_rxbuf = vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF);
- if (has_mrg_rxbuf) {
- vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- virtqueue_max = VIRTQUEUE_MAX_SIZE;
- } else {
- vnet_hdrlen = sizeof(struct virtio_net_hdr);
- virtqueue_max = 1;
- }
- l2_hdrlen = vnet_hdrlen + sizeof(struct ethhdr) + sizeof(struct udphdr);
+ return recvmsg(s, &msg, MSG_PEEK | MSG_DONTWAIT);
+}
+
+/**
+ * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
+ * @c: Execution context
+ * @s: Socket to receive from
+ * @events: epoll events bitmap
+ * @v6: Set for IPv6 connections
+ * @dlen: Size of received data (output)
+ *
+ * Return: Number of iov entries used to store the datagram
+ */
+static int udp_vu_sock_recv(const struct ctx *c, int s, uint32_t events,
+ bool v6, ssize_t *dlen)
+{
+ struct vu_dev *vdev = c->vdev;
+ struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ int iov_cnt, idx, iov_used;
+ struct msghdr msg = { 0 };
+ size_t off, hdrlen;
- if (v6) {
- l2_hdrlen += sizeof(struct ipv6hdr);
+ ASSERT(!c->no_udp);
- udp6_localname.sin6_port = htons(dstport);
- msg.msg_name = &udp6_localname;
- msg.msg_namelen = sizeof(udp6_localname);
- } else {
- l2_hdrlen += sizeof(struct iphdr);
+ if (!(events & EPOLLIN))
+ return 0;
+
+ /* compute L2 header length */
+ hdrlen = udp_vu_hdrlen(v6);
+
+ vu_init_elem(elem, iov_vu, VIRTQUEUE_MAX_SIZE);
+
+ iov_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE,
+ IP_MAX_MTU - sizeof(struct udphdr) + hdrlen,
+ NULL);
+ if (iov_cnt == 0)
+ return 0;
+
+ /* reserve space for the headers */
+ iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
+ iov_vu[0].iov_len -= hdrlen;
- udp4_localname.sin_port = htons(dstport);
- msg.msg_name = &udp4_localname;
- msg.msg_namelen = sizeof(udp4_localname);
+ /* read data from the socket */
+ msg.msg_iov = iov_vu;
+ msg.msg_iovlen = iov_cnt;
+
+ *dlen = recvmsg(s, &msg, 0);
+ if (*dlen < 0) {
+ vu_queue_rewind(vq, iov_cnt);
+ return 0;
}
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
+ /* restore the pointer to the headers address */
+ iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
+ iov_vu[0].iov_len += hdrlen;
- for (i = 0; i < UDP_MAX_FRAMES; i++) {
- struct virtio_net_hdr_mrg_rxbuf *vh;
- struct ethhdr *eh;
- char *base;
- size_t size;
-
- fillsize = USHRT_MAX;
- iov_count = 0;
- while (fillsize && iov_count < virtqueue_max) {
- VuVirtqElement *elem;
-
- elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer);
- if (!elem)
- break;
-
- if (elem->in_num < 1) {
- err("virtio-net receive queue contains no in buffers");
- vu_queue_rewind(vdev, vq, iov_count);
- return;
- }
- ASSERT(elem->in_num == 1);
- ASSERT(elem->in_sg[0].iov_len >= l2_hdrlen);
+ /* count the numbers of buffer filled by recvmsg() */
+ idx = iov_skip_bytes(iov_vu, iov_cnt, *dlen + hdrlen, &off);
- indexes[iov_count] = elem->index;
- if (iov_count == 0) {
- iov_vu[0].iov_base = (char *)elem->in_sg[0].iov_base + l2_hdrlen;
- iov_vu[0].iov_len = elem->in_sg[0].iov_len - l2_hdrlen;
- } else {
- iov_vu[iov_count].iov_base = elem->in_sg[0].iov_base;
- iov_vu[iov_count].iov_len = elem->in_sg[0].iov_len;
- }
+ /* adjust last iov length */
+ if (idx < iov_cnt)
+ iov_vu[idx].iov_len = off;
+ iov_used = idx + !!off;
- if (iov_vu[iov_count].iov_len > fillsize)
- iov_vu[iov_count].iov_len = fillsize;
+ vu_set_vnethdr(vdev, iov_vu[0].iov_base, iov_used);
- fillsize -= iov_vu[iov_count].iov_len;
+ /* release unused buffers */
+ vu_queue_rewind(vq, iov_cnt - iov_used);
- iov_count++;
- }
- if (iov_count == 0)
- break;
+ return iov_used;
+}
- msg.msg_iov = iov_vu;
- msg.msg_iovlen = iov_count;
+/**
+ * udp_vu_prepare() - Prepare the packet header
+ * @c: Execution context
+ * @toside: Address information for one side of the flow
+ * @dlen: Packet data length
+ *
+ * Return: Layer-4 length
+ */
+static size_t udp_vu_prepare(const struct ctx *c,
+ const struct flowside *toside, ssize_t dlen)
+{
+ struct ethhdr *eh;
+ size_t l4len;
- data_len = recvmsg(ref.fd, &msg, 0);
- if (data_len < 0) {
- vu_queue_rewind(vdev, vq, iov_count);
- return;
- }
+ /* ethernet header */
+ eh = vu_eth(iov_vu[0].iov_base);
- iov_used = 0;
- size = data_len;
- while (size) {
- if (iov_vu[iov_used].iov_len > size)
- iov_vu[iov_used].iov_len = size;
+ memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
+ memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
- size -= iov_vu[iov_used].iov_len;
- iov_used++;
- }
+ /* initialize header */
+ if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
+ struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
- base = (char *)iov_vu[0].iov_base - l2_hdrlen;
- size = iov_vu[0].iov_len + l2_hdrlen;
+ eh->h_proto = htons(ETH_P_IP);
- /* release unused buffers */
- vu_queue_rewind(vdev, vq, iov_count - iov_used);
+ *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
- /* vnet_header */
- vh = (struct virtio_net_hdr_mrg_rxbuf *)base;
- vh->hdr = vu_header;
- if (has_mrg_rxbuf)
- vh->num_buffers = htole16(iov_used);
+ l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
+ } else {
+ struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
- /* ethernet header */
- eh = (struct ethhdr *)(base + vnet_hdrlen);
+ eh->h_proto = htons(ETH_P_IPV6);
- memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest));
- memcpy(eh->h_source, c->mac, sizeof(eh->h_source));
+ *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
- /* initialize header */
- if (v6) {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1);
- struct udphdr *uh = (struct udphdr *)(ip6h + 1);
- uint32_t sum;
+ l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true);
+ }
- eh->h_proto = htons(ETH_P_IPV6);
+ return l4len;
+}
- *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
+/**
+ * udp_vu_csum() - Calculate and set checksum for a UDP packet
+ * @toside: ddress information for one side of the flow
+ * @l4len: IPv4 Payload length
+ * @iov_used: Length of the array
+ */
+static void udp_vu_csum(const struct flowside *toside, int iov_used)
+{
+ const struct in_addr *src4 = inany_v4(&toside->oaddr);
+ const struct in_addr *dst4 = inany_v4(&toside->eaddr);
+ char *base = iov_vu[0].iov_base;
+ struct udp_payload_t *bp;
+
+ if (src4 && dst4) {
+ bp = vu_payloadv4(base);
+ csum_udp4(&bp->uh, *src4, *dst4, iov_vu, iov_used,
+ (char *)&bp->data - base);
+ } else {
+ bp = vu_payloadv6(base);
+ csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6,
+ iov_vu, iov_used, (char *)&bp->data - base);
+ }
+}
- udp_update_hdr6(c, ip6h, data_len, &udp6_localname,
- dstport, now);
- if (*c->pcap) {
- sum = proto_ipv6_header_psum(ip6h->payload_len,
- IPPROTO_UDP,
- &ip6h->saddr,
- &ip6h->daddr);
+/**
+ * udp_vu_listen_sock_handler() - Handle new data from socket
+ * @c: Execution context
+ * @ref: epoll reference
+ * @events: epoll events bitmap
+ * @now: Current timestamp
+ */
+void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events, const struct timespec *now)
+{
+ struct vu_dev *vdev = c->vdev;
+ struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ int i;
- iov_vu[0].iov_base = uh;
- iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh);
- uh->check = csum_iov(iov_vu, iov_used, sum);
- }
- } else {
- struct iphdr *iph = (struct iphdr *)(eh + 1);
- struct udphdr *uh = (struct udphdr *)(iph + 1);
- uint32_t sum;
-
- eh->h_proto = htons(ETH_P_IP);
-
- *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
-
- udp_update_hdr4(c, iph, data_len, &udp4_localname,
- dstport, now);
- if (*c->pcap) {
- sum = proto_ipv4_header_psum(iph->tot_len,
- IPPROTO_UDP,
- (struct in_addr){ .s_addr = iph->saddr },
- (struct in_addr){ .s_addr = iph->daddr });
-
- iov_vu[0].iov_base = uh;
- iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh);
- uh->check = csum_iov(iov_vu, iov_used, sum);
+ if (udp_sock_errs(c, ref.fd, events) < 0) {
+ err("UDP: Unrecoverable error on listening socket:"
+ " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
+ return;
+ }
+
+ for (i = 0; i < UDP_MAX_FRAMES; i++) {
+ const struct flowside *toside;
+ union sockaddr_inany s_in;
+ flow_sidx_t sidx;
+ uint8_t pif;
+ ssize_t dlen;
+ int iov_used;
+ bool v6;
+
+ if (udp_vu_sock_init(ref.fd, &s_in) < 0)
+ break;
+
+ sidx = udp_flow_from_sock(c, ref, &s_in, now);
+ pif = pif_at_sidx(sidx);
+
+ if (pif != PIF_TAP) {
+ if (flow_sidx_valid(sidx)) {
+ flow_sidx_t fromsidx = flow_sidx_opposite(sidx);
+ struct udp_flow *uflow = udp_at_sidx(sidx);
+
+ flow_err(uflow,
+ "No support for forwarding UDP from %s to %s",
+ pif_name(pif_at_sidx(fromsidx)),
+ pif_name(pif));
+ } else {
+ debug("Discarding 1 datagram without flow");
}
+
+ continue;
+ }
+
+ toside = flowside_at_sidx(sidx);
+
+ v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
+
+ iov_used = udp_vu_sock_recv(c, ref.fd, events, v6, &dlen);
+ if (iov_used <= 0)
+ break;
+
+ udp_vu_prepare(c, toside, dlen);
+ if (*c->pcap) {
+ udp_vu_csum(toside, iov_used);
+ pcap_iov(iov_vu, iov_used,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
+ vu_flush(vdev, vq, elem, iov_used);
+ }
+}
+
+/**
+ * udp_vu_reply_sock_handler() - Handle new data from flow specific socket
+ * @c: Execution context
+ * @ref: epoll reference
+ * @events: epoll events bitmap
+ * @now: Current timestamp
+ */
+void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events, const struct timespec *now)
+{
+ flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
+ const struct flowside *toside = flowside_at_sidx(tosidx);
+ struct udp_flow *uflow = udp_at_sidx(ref.flowside);
+ int from_s = uflow->s[ref.flowside.sidei];
+ struct vu_dev *vdev = c->vdev;
+ struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ int i;
+
+ ASSERT(!c->no_udp);
+
+ if (udp_sock_errs(c, from_s, events) < 0) {
+ flow_err(uflow, "Unrecoverable error on reply socket");
+ flow_err_details(uflow);
+ udp_flow_close(c, uflow);
+ return;
+ }
+
+ for (i = 0; i < UDP_MAX_FRAMES; i++) {
+ uint8_t topif = pif_at_sidx(tosidx);
+ ssize_t dlen;
+ int iov_used;
+ bool v6;
- /* set iov for pcap logging */
- iov_vu[0].iov_base = base + vnet_hdrlen;
- iov_vu[0].iov_len = size - vnet_hdrlen;
- pcap_iov(iov_vu, iov_used);
+ ASSERT(uflow);
- /* set iov_len for vu_queue_fill_by_index(); */
- iov_vu[0].iov_base = base;
- iov_vu[0].iov_len = size;
+ if (topif != PIF_TAP) {
+ uint8_t frompif = pif_at_sidx(ref.flowside);
- /* send packets */
- for (i = 0; i < iov_used; i++)
- vu_queue_fill_by_index(vdev, vq, indexes[i],
- iov_vu[i].iov_len, i);
+ flow_err(uflow,
+ "No support for forwarding UDP from %s to %s",
+ pif_name(frompif), pif_name(topif));
+ continue;
+ }
+
+ v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
- vu_queue_flush(vdev, vq, iov_used);
- vu_queue_notify(vdev, vq);
+ iov_used = udp_vu_sock_recv(c, from_s, events, v6, &dlen);
+ if (iov_used <= 0)
+ break;
+ flow_trace(uflow, "Received 1 datagram on reply socket");
+ uflow->ts = now->tv_sec;
+
+ udp_vu_prepare(c, toside, dlen);
+ if (*c->pcap) {
+ udp_vu_csum(toside, iov_used);
+ pcap_iov(iov_vu, iov_used,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf));
+ }
+ vu_flush(vdev, vq, elem, iov_used);
}
}