diff options
| author | Stefano Brivio <sbrivio@redhat.com> | 2025-11-03 11:16:11 +0100 |
|---|---|---|
| committer | Stefano Brivio <sbrivio@redhat.com> | 2025-12-08 04:47:22 +0100 |
| commit | 68b0a36d6a40aece53df9d2a0a89addd07eda515 (patch) | |
| tree | 6d581684967a8077c7a9c8e684ea31cd9e762e0b | |
| parent | b9cd36fa1f306ed2a1cc5c1a0c38ce05202afaaa (diff) | |
| download | passt-68b0a36d6a40aece53df9d2a0a89addd07eda515.tar passt-68b0a36d6a40aece53df9d2a0a89addd07eda515.tar.gz passt-68b0a36d6a40aece53df9d2a0a89addd07eda515.tar.bz2 passt-68b0a36d6a40aece53df9d2a0a89addd07eda515.tar.lz passt-68b0a36d6a40aece53df9d2a0a89addd07eda515.tar.xz passt-68b0a36d6a40aece53df9d2a0a89addd07eda515.tar.zst passt-68b0a36d6a40aece53df9d2a0a89addd07eda515.zip | |
tcp, udp: Pad batched frames to 60 bytes (802.3 minimum) in non-vhost-user modes
Add a further iovec frame part, TCP_IOV_ETH_PAD for TCP and
UDP_IOV_ETH_PAD for UDP, after the payload, make that point to a
zero-filled buffer, and send out a part of it if needed to reach
the minimum frame length given by 802.3, that is, 60 bytes altogether.
The frames we might need to pad are IPv4 only (the IPv6 header is
larger), and are typically TCP ACK segments but can also be small
data segments or datagrams.
Link: https://bugs.passt.top/show_bug.cgi?id=166
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
| -rw-r--r-- | tcp.c | 2 | ||||
| -rw-r--r-- | tcp_buf.c | 23 | ||||
| -rw-r--r-- | tcp_internal.h | 2 | ||||
| -rw-r--r-- | udp.c | 33 | ||||
| -rw-r--r-- | util.c | 3 | ||||
| -rw-r--r-- | util.h | 3 |
6 files changed, 62 insertions, 4 deletions
@@ -1019,7 +1019,7 @@ void tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn, else tcp_update_csum(psum, th, payload); - tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); + tap_hdr_update(taph, MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN)); } /** @@ -96,6 +96,7 @@ void tcp_sock_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_payload_tap_hdr[i]); iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i]; + iov[TCP_IOV_ETH_PAD].iov_base = eth_pad; } } @@ -145,6 +146,22 @@ void tcp_payload_flush(const struct ctx *c) } /** + * tcp_l2_buf_pad() - Calculate padding to send out of padding (zero) buffer + * @iov: Pointer to iovec of frame parts we're about to send + */ +static void tcp_l2_buf_pad(struct iovec *iov) +{ + size_t l2len = iov[TCP_IOV_ETH].iov_len + + iov[TCP_IOV_IP].iov_len + + iov[TCP_IOV_PAYLOAD].iov_len; + + if (l2len < ETH_ZLEN) + iov[TCP_IOV_ETH_PAD].iov_len = ETH_ZLEN - l2len; + else + iov[TCP_IOV_ETH_PAD].iov_len = 0; +} + +/** * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers * @c: Execution context * @conn: Connection pointer @@ -212,6 +229,8 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) iov[TCP_IOV_PAYLOAD].iov_len = l4len; tcp_l2_buf_fill_headers(c, conn, iov, NULL, seq, false); + tcp_l2_buf_pad(iov); + if (flags & DUP_ACK) { struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used]; tcp_frame_conns[tcp_payload_used++] = conn; @@ -223,6 +242,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) memcpy(dup_iov[TCP_IOV_PAYLOAD].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, l4len); dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len; + dup_iov[TCP_IOV_ETH_PAD].iov_len = iov[TCP_IOV_ETH_PAD].iov_len; } if (tcp_payload_used > TCP_FRAMES_MEM - 2) @@ -270,6 +290,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, payload->th.psh = push; iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr); tcp_l2_buf_fill_headers(c, conn, iov, check, seq, false); + + tcp_l2_buf_pad(iov); + if (++tcp_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); } diff --git a/tcp_internal.h b/tcp_internal.h index 19e8922..5f8fb35 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -63,6 +63,7 @@ * @TCP_IOV_ETH Ethernet header * @TCP_IOV_IP IP (v4/v6) header * @TCP_IOV_PAYLOAD IP payload (TCP header + data) + * @TCP_IOV_ETH_PAD Ethernet (802.3) padding to 60 bytes * @TCP_NUM_IOVS the number of entries in the iovec array */ enum tcp_iov_parts { @@ -70,6 +71,7 @@ enum tcp_iov_parts { TCP_IOV_ETH = 1, TCP_IOV_IP = 2, TCP_IOV_PAYLOAD = 3, + TCP_IOV_ETH_PAD = 4, TCP_NUM_IOVS }; @@ -168,6 +168,7 @@ udp_meta[UDP_MAX_FRAMES]; * @UDP_IOV_ETH Ethernet header * @UDP_IOV_IP IP (v4/v6) header * @UDP_IOV_PAYLOAD IP payload (UDP header + data) + * @UDP_IOV_ETH_PAD Ethernet (802.3) padding to 60 bytes * @UDP_NUM_IOVS the number of entries in the iovec array */ enum udp_iov_idx { @@ -175,6 +176,7 @@ enum udp_iov_idx { UDP_IOV_ETH, UDP_IOV_IP, UDP_IOV_PAYLOAD, + UDP_IOV_ETH_PAD, UDP_NUM_IOVS, }; @@ -239,6 +241,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp_eth_hdr[i]); tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph); tiov[UDP_IOV_PAYLOAD].iov_base = payload; + tiov[UDP_IOV_ETH_PAD].iov_base = eth_pad; mh->msg_iov = siov; mh->msg_iovlen = 1; @@ -345,6 +348,22 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, } /** + * udp_tap_pad() - Calculate padding to send out of padding (zero) buffer + * @iov: Pointer to iovec of frame parts we're about to send + */ +static void udp_tap_pad(struct iovec *iov) +{ + size_t l2len = iov[UDP_IOV_ETH].iov_len + + iov[UDP_IOV_IP].iov_len + + iov[UDP_IOV_PAYLOAD].iov_len; + + if (l2len < ETH_ZLEN) + iov[UDP_IOV_ETH_PAD].iov_len = ETH_ZLEN - l2len; + else + iov[UDP_IOV_ETH_PAD].iov_len = 0; +} + +/** * udp_tap_prepare() - Convert one datagram into a tap frame * @mmh: Receiving mmsghdr array * @idx: Index of the datagram to prepare @@ -362,23 +381,31 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base; struct udp_payload_t *bp = &udp_payload[idx]; struct udp_meta_t *bm = &udp_meta[idx]; - size_t l4len; + size_t l4len, l2len; eth_update_mac(eh, NULL, tap_omac); if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) { l4len = udp_update_hdr6(&bm->ip6h, bp, toside, mmh[idx].msg_len, no_udp_csum); - tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + ETH_HLEN); + + l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN); + tap_hdr_update(&bm->taph, l2len); + eh->h_proto = htons_constant(ETH_P_IPV6); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h); } else { l4len = udp_update_hdr4(&bm->ip4h, bp, toside, mmh[idx].msg_len, no_udp_csum); - tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + ETH_HLEN); + + l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN); + tap_hdr_update(&bm->taph, l2len); + eh->h_proto = htons_constant(ETH_P_IP); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h); } (*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len; + + udp_tap_pad(*tap_iov); } /** @@ -39,6 +39,9 @@ #include <sys/random.h> #endif +/* Zero-filled buffer to pad 802.3 frames, up to 60 (ETH_ZLEN) bytes */ +uint8_t eth_pad[ETH_ZLEN] = { 0 }; + /** * sock_l4_() - Create and bind socket to socket address * @c: Execution context @@ -17,6 +17,7 @@ #include <arpa/inet.h> #include <unistd.h> #include <sys/syscall.h> +#include <net/ethernet.h> #include "log.h" @@ -152,6 +153,8 @@ void abort_with_msg(const char *fmt, ...) #define ntohll(x) (be64toh((x))) #define htonll(x) (htobe64((x))) +extern uint8_t eth_pad[ETH_ZLEN]; + /** * ntohl_unaligned() - Read 32-bit BE value from a possibly unaligned address * @p: Pointer to the BE value in memory |
