diff options
| author | Stefano Brivio <sbrivio@redhat.com> | 2025-11-03 11:16:12 +0100 |
|---|---|---|
| committer | Stefano Brivio <sbrivio@redhat.com> | 2025-12-08 04:47:46 +0100 |
| commit | 0cb8f90036540527133ad55a5976d2f639cce8da (patch) | |
| tree | 7a409deba0cea0186f5e9969b7a18b3027d63448 | |
| parent | 68b0a36d6a40aece53df9d2a0a89addd07eda515 (diff) | |
| download | passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.gz passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.bz2 passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.lz passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.xz passt-0cb8f90036540527133ad55a5976d2f639cce8da.tar.zst passt-0cb8f90036540527133ad55a5976d2f639cce8da.zip | |
tcp, udp: Pad batched frames for vhost-user modes to 60 bytes (802.3 minimum)
For both TCP and UDP, instead of just expecting the first provided
buffer to be large enough to contain the headers we need (from 42
bytes for UDP data over IPv4 to 82 bytes for TCP with options over
IPv6), change that assumption to make sure that buffers are anyway
at least ETH_ZLEN-sized buffers (60 bytes).
This looks reasonable because there are no known vhost-user
hypervisor implementations that would give us smaller buffers than
that, and we would anyway hit an assertion failure with IPv6 if we
ever had less than 60 bytes per buffer.
At this point, all we need to do is to pad the first (and only)
buffer, should data and headers use less than that.
Link: https://bugs.passt.top/show_bug.cgi?id=166
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
| -rw-r--r-- | tcp_vu.c | 17 | ||||
| -rw-r--r-- | udp_vu.c | 8 | ||||
| -rw-r--r-- | vu_common.c | 14 | ||||
| -rw-r--r-- | vu_common.h | 1 |
4 files changed, 33 insertions, 7 deletions
@@ -91,12 +91,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) vu_set_element(&flags_elem[0], NULL, &flags_iov[0]); elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1, - hdrlen + sizeof(struct tcp_syn_opts), NULL); + MAX(hdrlen + sizeof(*opts), ETH_ZLEN), NULL); if (elem_cnt != 1) return -1; ASSERT(flags_elem[0].in_sg[0].iov_len >= - hdrlen + sizeof(struct tcp_syn_opts)); + MAX(hdrlen + sizeof(*opts), ETH_ZLEN)); vu_set_vnethdr(vdev, flags_elem[0].in_sg[0].iov_base, 1); @@ -138,6 +138,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) tcp_fill_headers(c, conn, NULL, eh, ip4h, ip6h, th, &payload, NULL, seq, !*c->pcap); + vu_pad(&flags_elem[0].in_sg[0], hdrlen + optlen); + if (*c->pcap) { pcap_iov(&flags_elem[0].in_sg[0], 1, sizeof(struct virtio_net_hdr_mrg_rxbuf)); @@ -211,7 +213,8 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, cnt = vu_collect(vdev, vq, &elem[elem_cnt], VIRTQUEUE_MAX_SIZE - elem_cnt, - MIN(mss, fillsize) + hdrlen, &frame_size); + MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN), + &frame_size); if (cnt == 0) break; @@ -254,6 +257,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, len -= iov->iov_len; } + /* adjust head count */ while (*head_cnt > 0 && head[*head_cnt - 1] >= i) (*head_cnt)--; @@ -301,9 +305,9 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn, struct ethhdr *eh; /* we guess the first iovec provided by the guest can embed - * all the headers needed by L2 frame + * all the headers needed by L2 frame, including any padding */ - ASSERT(iov[0].iov_len >= hdrlen); + ASSERT(iov[0].iov_len >= MAX(hdrlen, ETH_ZLEN)); eh = vu_eth(base); @@ -456,6 +460,9 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push); + /* Pad first/single buffer only, it's at least ETH_ZLEN long */ + vu_pad(iov, dlen + hdrlen); + if (*c->pcap) { pcap_iov(iov, buf_cnt, sizeof(struct virtio_net_hdr_mrg_rxbuf)); @@ -72,8 +72,8 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, { const struct vu_dev *vdev = c->vdev; int iov_cnt, idx, iov_used; + size_t off, hdrlen, l2len; struct msghdr msg = { 0 }; - size_t off, hdrlen; ASSERT(!c->no_udp); @@ -90,7 +90,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, return 0; /* reserve space for the headers */ - ASSERT(iov_vu[0].iov_len >= hdrlen); + ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN)); iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen; iov_vu[0].iov_len -= hdrlen; @@ -116,6 +116,10 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, iov_vu[idx].iov_len = off; iov_used = idx + !!off; + /* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */ + l2len = *dlen + hdrlen - sizeof(struct virtio_net_hdr_mrg_rxbuf); + vu_pad(&iov_vu[0], l2len); + vu_set_vnethdr(vdev, iov_vu[0].iov_base, iov_used); /* release unused buffers */ diff --git a/vu_common.c b/vu_common.c index ce61fa6..c682498 100644 --- a/vu_common.c +++ b/vu_common.c @@ -293,3 +293,17 @@ err: return -1; } + +/** + * vu_pad() - Pad 802.3 frame to minimum length (60 bytes) if needed + * @iov: Buffer in iovec array where end of 802.3 frame is stored + * @l2len: Layer-2 length already filled in frame + */ +void vu_pad(struct iovec *iov, size_t l2len) +{ + if (l2len >= ETH_ZLEN) + return; + + memset((char *)iov->iov_base + iov->iov_len, 0, ETH_ZLEN - l2len); + iov->iov_len += ETH_ZLEN - l2len; +} diff --git a/vu_common.h b/vu_common.h index c0883b2..27fe7e0 100644 --- a/vu_common.h +++ b/vu_common.h @@ -57,5 +57,6 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq, void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref, const struct timespec *now); int vu_send_single(const struct ctx *c, const void *buf, size_t size); +void vu_pad(struct iovec *iov, size_t l2len); #endif /* VU_COMMON_H */ |
