aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2025-11-03 11:16:12 +0100
committerStefano Brivio <sbrivio@redhat.com>2025-12-08 04:47:46 +0100
commit0cb8f90036540527133ad55a5976d2f639cce8da (patch)
tree7a409deba0cea0186f5e9969b7a18b3027d63448
parent68b0a36d6a40aece53df9d2a0a89addd07eda515 (diff)
downloadpasst-0cb8f90036540527133ad55a5976d2f639cce8da.tar
passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.gz
passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.bz2
passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.lz
passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.xz
passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.zst
passt-0cb8f90036540527133ad55a5976d2f639cce8da.zip
tcp, udp: Pad batched frames for vhost-user modes to 60 bytes (802.3 minimum)
For both TCP and UDP, instead of just expecting the first provided buffer to be large enough to contain the headers we need (from 42 bytes for UDP data over IPv4 to 82 bytes for TCP with options over IPv6), change that assumption to make sure that buffers are anyway at least ETH_ZLEN-sized buffers (60 bytes). This looks reasonable because there are no known vhost-user hypervisor implementations that would give us smaller buffers than that, and we would anyway hit an assertion failure with IPv6 if we ever had less than 60 bytes per buffer. At this point, all we need to do is to pad the first (and only) buffer, should data and headers use less than that. Link: https://bugs.passt.top/show_bug.cgi?id=166 Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
-rw-r--r--tcp_vu.c17
-rw-r--r--udp_vu.c8
-rw-r--r--vu_common.c14
-rw-r--r--vu_common.h1
4 files changed, 33 insertions, 7 deletions
diff --git a/tcp_vu.c b/tcp_vu.c
index 1c81ce3..db9db78 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -91,12 +91,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
vu_set_element(&flags_elem[0], NULL, &flags_iov[0]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
- hdrlen + sizeof(struct tcp_syn_opts), NULL);
+ MAX(hdrlen + sizeof(*opts), ETH_ZLEN), NULL);
if (elem_cnt != 1)
return -1;
ASSERT(flags_elem[0].in_sg[0].iov_len >=
- hdrlen + sizeof(struct tcp_syn_opts));
+ MAX(hdrlen + sizeof(*opts), ETH_ZLEN));
vu_set_vnethdr(vdev, flags_elem[0].in_sg[0].iov_base, 1);
@@ -138,6 +138,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
tcp_fill_headers(c, conn, NULL, eh, ip4h, ip6h, th, &payload,
NULL, seq, !*c->pcap);
+ vu_pad(&flags_elem[0].in_sg[0], hdrlen + optlen);
+
if (*c->pcap) {
pcap_iov(&flags_elem[0].in_sg[0], 1,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
@@ -211,7 +213,8 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
cnt = vu_collect(vdev, vq, &elem[elem_cnt],
VIRTQUEUE_MAX_SIZE - elem_cnt,
- MIN(mss, fillsize) + hdrlen, &frame_size);
+ MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN),
+ &frame_size);
if (cnt == 0)
break;
@@ -254,6 +257,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
len -= iov->iov_len;
}
+
/* adjust head count */
while (*head_cnt > 0 && head[*head_cnt - 1] >= i)
(*head_cnt)--;
@@ -301,9 +305,9 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
struct ethhdr *eh;
/* we guess the first iovec provided by the guest can embed
- * all the headers needed by L2 frame
+ * all the headers needed by L2 frame, including any padding
*/
- ASSERT(iov[0].iov_len >= hdrlen);
+ ASSERT(iov[0].iov_len >= MAX(hdrlen, ETH_ZLEN));
eh = vu_eth(base);
@@ -456,6 +460,9 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
+ /* Pad first/single buffer only, it's at least ETH_ZLEN long */
+ vu_pad(iov, dlen + hdrlen);
+
if (*c->pcap) {
pcap_iov(iov, buf_cnt,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
diff --git a/udp_vu.c b/udp_vu.c
index 099677f..c30dcf9 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -72,8 +72,8 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
{
const struct vu_dev *vdev = c->vdev;
int iov_cnt, idx, iov_used;
+ size_t off, hdrlen, l2len;
struct msghdr msg = { 0 };
- size_t off, hdrlen;
ASSERT(!c->no_udp);
@@ -90,7 +90,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
return 0;
/* reserve space for the headers */
- ASSERT(iov_vu[0].iov_len >= hdrlen);
+ ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN));
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
iov_vu[0].iov_len -= hdrlen;
@@ -116,6 +116,10 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
iov_vu[idx].iov_len = off;
iov_used = idx + !!off;
+ /* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
+ l2len = *dlen + hdrlen - sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ vu_pad(&iov_vu[0], l2len);
+
vu_set_vnethdr(vdev, iov_vu[0].iov_base, iov_used);
/* release unused buffers */
diff --git a/vu_common.c b/vu_common.c
index ce61fa6..c682498 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -293,3 +293,17 @@ err:
return -1;
}
+
+/**
+ * vu_pad() - Pad 802.3 frame to minimum length (60 bytes) if needed
+ * @iov: Buffer in iovec array where end of 802.3 frame is stored
+ * @l2len: Layer-2 length already filled in frame
+ */
+void vu_pad(struct iovec *iov, size_t l2len)
+{
+ if (l2len >= ETH_ZLEN)
+ return;
+
+ memset((char *)iov->iov_base + iov->iov_len, 0, ETH_ZLEN - l2len);
+ iov->iov_len += ETH_ZLEN - l2len;
+}
diff --git a/vu_common.h b/vu_common.h
index c0883b2..27fe7e0 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -57,5 +57,6 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
+void vu_pad(struct iovec *iov, size_t l2len);
#endif /* VU_COMMON_H */