From 37c228ada88b7fa0001659b13c34a783ba75df83 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 28 Mar 2022 16:56:01 +0200 Subject: tap, tcp, udp, icmp: Cut down on some oversized buffers The existing sizes provide no measurable differences in throughput and packet rates at this point. They were probably needed as batched implementations were not complete, but they can be decreased quite a bit now. Signed-off-by: Stefano Brivio --- icmp.c | 12 ++++++++++-- passt.h | 2 +- tap.c | 24 +++++++++++++----------- tcp.c | 51 +++++++++++++++++++++++++++++++++++++++++---------- tcp_splice.c | 10 +++++----- udp.c | 4 ++-- 6 files changed, 72 insertions(+), 31 deletions(-) diff --git a/icmp.c b/icmp.c index 94093d2..0eb5bfe 100644 --- a/icmp.c +++ b/icmp.c @@ -43,13 +43,13 @@ /** * struct icmp_id_sock - Tracking information for single ICMP echo identifier * @sock: Bound socket for identifier - * @ts: Last associated activity from tap, seconds * @seq: Last sequence number sent to tap, host order + * @ts: Last associated activity from tap, seconds */ struct icmp_id_sock { int sock; - time_t ts; uint16_t seq; + time_t ts; }; /* Indexed by ICMP echo identifier */ @@ -168,6 +168,10 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr, s = sock_l4(c, AF_INET, IPPROTO_ICMP, id, 0, iref.u32); if (s < 0) goto fail_sock; + if (s > SOCKET_MAX) { + close(s); + return 1; + } icmp_id_map[V4][id].sock = s; } @@ -201,6 +205,10 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr, iref.u32); if (s < 0) goto fail_sock; + if (s > SOCKET_MAX) { + close(s); + return 1; + } icmp_id_map[V6][id].sock = s; } diff --git a/passt.h b/passt.h index 7d40c6f..69e334d 100644 --- a/passt.h +++ b/passt.h @@ -63,7 +63,7 @@ union epoll_ref { }; #define TAP_BUF_BYTES \ - ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 256), PAGE_SIZE) + ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE) #define TAP_BUF_FILL (TAP_BUF_BYTES - ETH_MAX_MTU - sizeof(uint32_t)) #define TAP_MSGS \ DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t)) diff --git a/tap.c b/tap.c index 6333015..aca8c75 100644 --- a/tap.c +++ b/tap.c @@ -57,6 +57,8 @@ static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf); static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf); +#define TAP_SEQS 128 /* Different L4 tuples in one batch */ + /** * tap_send() - Send frame, with qemu socket header if needed * @c: Execution context @@ -225,7 +227,7 @@ static struct tap4_l4_t { uint32_t daddr; struct pool_l4_t p; -} tap4_l4[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */]; +} tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; /** * struct l4_seq6_t - Message sequence for one protocol handler call, IPv6 @@ -247,7 +249,7 @@ static struct tap6_l4_t { struct in6_addr daddr; struct pool_l4_t p; -} tap6_l4[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */]; +} tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; /** * tap_packet_debug() - Print debug message for packet(s) from guest/tap @@ -401,12 +403,12 @@ resume: seq->daddr = iph->daddr; \ } while (0) - if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV) + if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < TAP_SEQS) goto append; for (seq = tap4_l4 + seq_count - 1; seq >= tap4_l4; seq--) { if (L4_MATCH(iph, uh, seq)) { - if (seq->p.count >= UIO_MAXIOV) + if (seq->p.count >= TAP_SEQS) seq = NULL; break; } @@ -424,7 +426,7 @@ resume: append: packet_add((struct pool *)&seq->p, l4_len, l4h); - if (seq_count == UIO_MAXIOV) + if (seq_count == TAP_SEQS) break; /* Resume after flushing if i < count */ } @@ -563,12 +565,12 @@ resume: } while (0) if (seq && L4_MATCH(ip6h, proto, uh, seq) && - seq->p.count < UIO_MAXIOV) + seq->p.count < TAP_SEQS) goto append; for (seq = tap6_l4 + seq_count - 1; seq >= tap6_l4; seq--) { if (L4_MATCH(ip6h, proto, uh, seq)) { - if (seq->p.count >= UIO_MAXIOV) + if (seq->p.count >= TAP_SEQS) seq = NULL; break; } @@ -586,7 +588,7 @@ resume: append: packet_add((struct pool *)&seq->p, l4_len, l4h); - if (seq_count == UIO_MAXIOV) + if (seq_count == TAP_SEQS) break; /* Resume after flushing if i < count */ } @@ -924,9 +926,9 @@ void tap_sock_init(struct ctx *c) pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, pkt_buf, sz); pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, pkt_buf, sz); - for (i = 0; i < UIO_MAXIOV; i++) { - tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, pkt_buf, sz); - tap6_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, pkt_buf, sz); + for (i = 0; i < TAP_SEQS; i++) { + tap4_l4[i].p = PACKET_INIT(pool_l4, TAP_SEQS, pkt_buf, sz); + tap6_l4[i].p = PACKET_INIT(pool_l4, TAP_SEQS, pkt_buf, sz); } if (c->fd_tap != -1) { diff --git a/tcp.c b/tcp.c index fe46ede..2194067 100644 --- a/tcp.c +++ b/tcp.c @@ -70,9 +70,9 @@ * * Data needs to linger on sockets as long as it's not acknowledged by the * guest, and is read using MSG_PEEK into preallocated static buffers sized - * to the maximum supported window, 64MiB ("discard" buffer, for already-sent + * to the maximum supported window, 16 MiB ("discard" buffer, for already-sent * data) plus a number of maximum-MSS-sized buffers. This imposes a practical - * limitation on window scaling, that is, the maximum factor is 1024. Larger + * limitation on window scaling, that is, the maximum factor is 256. Larger * factors will be accepted, but resulting, larger values are never advertised * to the other side, and not used while queueing data. * @@ -299,7 +299,7 @@ #include "conf.h" #include "tcp_splice.h" -#define TCP_FRAMES_MEM 256 +#define TCP_FRAMES_MEM 128 #define TCP_FRAMES \ (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) @@ -311,17 +311,48 @@ #define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \ TCP_HASH_TABLE_LOAD) -#define MAX_WS 10 +#define MAX_WS 8 #define MAX_WINDOW (1 << (16 + (MAX_WS))) /* MSS rounding: see SET_MSS() */ #define MSS_DEFAULT 536 -#define MSS4 ROUND_DOWN(USHRT_MAX - \ - sizeof(uint32_t) - sizeof(struct ethhdr) - \ - sizeof(struct iphdr) - sizeof(struct tcphdr), 4) -#define MSS6 ROUND_DOWN(USHRT_MAX - \ - sizeof(uint32_t) - sizeof(struct ethhdr) - \ - sizeof(struct ipv6hdr) - sizeof(struct tcphdr), 4) + +struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */ + uint32_t psum; + uint32_t tsum; +#ifdef __AVX2__ + uint8_t pad[18]; +#else + uint8_t pad[2]; +#endif + uint32_t vnet_len; + struct ethhdr eh; + struct iphdr iph; + struct tcphdr th; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))); +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); +#endif + +struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */ +#ifdef __AVX2__ + uint8_t pad[14]; +#else + uint8_t pad[2]; +#endif + uint32_t vnet_len; + struct ethhdr eh; + struct ipv6hdr ip6h; + struct tcphdr th; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))); +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); +#endif + +#define MSS4 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp4_l2_head), 4) +#define MSS6 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp6_l2_head), 4) #define WINDOW_DEFAULT 14600 /* RFC 6928 */ #ifdef HAS_SND_WND diff --git a/tcp_splice.c b/tcp_splice.c index 714571c..3f2ef2e 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -102,10 +102,10 @@ struct tcp_splice_conn { #define RCVLOWAT_ACT_B BIT(5) #define CLOSING BIT(6) - uint64_t a_read; - uint64_t a_written; - uint64_t b_read; - uint64_t b_written; + uint32_t a_read; + uint32_t a_written; + uint32_t b_read; + uint32_t b_written; }; #define CONN_V6(x) (x->flags & SOCK_V6) @@ -553,7 +553,7 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref, { uint8_t lowat_set_flag, lowat_act_flag; int from, to, *pipes, eof, never_read; - uint64_t *seq_read, *seq_write; + uint32_t *seq_read, *seq_write; struct tcp_splice_conn *conn; if (ref.r.p.tcp.tcp.listen) { diff --git a/udp.c b/udp.c index 2cdbe6c..1c0fdc6 100644 --- a/udp.c +++ b/udp.c @@ -117,8 +117,8 @@ #include "pcap.h" #define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ -#define UDP_SPLICE_FRAMES 128 -#define UDP_TAP_FRAMES_MEM 128 +#define UDP_SPLICE_FRAMES 32 +#define UDP_TAP_FRAMES_MEM 32 #define UDP_TAP_FRAMES (c->mode == MODE_PASST ? UDP_TAP_FRAMES_MEM : 1) /** -- cgit v1.2.3