From 683043e2001e71e0b7d0b132da4756d329f22f27 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 4 Oct 2021 22:08:24 +0200 Subject: tcp: Probe net.core.{r,w}mem_max, don't set SO_{RCV,SND}BUF if low If net.core.rmem_max and net.core.wmem_max sysctls have low values, we can get bigger buffers by not trying to set them high -- the kernel would lock their values to what we get. Try, instead, to get bigger buffers by queueing as much as possible, and if maximum values in tcp_wmem and tcp_rmem are bigger than this, that will work. While at it, drop QUICKACK option for non-spliced sockets, I set that earlier by mistake. Signed-off-by: Stefano Brivio --- tcp.c | 38 ++++++++++++++++++++++---------------- tcp.h | 4 ++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/tcp.c b/tcp.c index e001df4..b33df1e 100644 --- a/tcp.c +++ b/tcp.c @@ -807,15 +807,18 @@ static void tcp_get_sndbuf(struct tcp_tap_conn *conn) * tcp_sock_set_bufsize() - Set SO_RCVBUF and SO_SNDBUF to maximum values * @s: Socket, can be -1 to avoid check in the caller */ -static void tcp_sock_set_bufsize(int s) +static void tcp_sock_set_bufsize(struct ctx *c, int s) { int v = INT_MAX / 2; /* Kernel clamps and rounds, no need to check */ if (s == -1) return; - setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)); - setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)); + if (!c->tcp.low_rmem) + setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)); + + if (!c->tcp.low_wmem) + setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)); } /** @@ -1308,7 +1311,8 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags, else mss -= sizeof(struct ipv6hdr); - if (!conn->local && !tcp_rtt_dst_low(conn)) + if (c->tcp.low_wmem && + !conn->local && !tcp_rtt_dst_low(conn)) mss = MIN(mss, PAGE_SIZE); else mss = ROUND_DOWN(mss, PAGE_SIZE); @@ -1571,7 +1575,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr, if (s < 0) return; - tcp_sock_set_bufsize(s); + tcp_sock_set_bufsize(c, s); if (af == AF_INET && addr4.sin_addr.s_addr == c->gw4) addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); @@ -2560,13 +2564,15 @@ static int tcp_splice_connect(struct ctx *c, struct tcp_splice_conn *conn, .sin_addr = { .s_addr = htonl(INADDR_LOOPBACK) }, }; const struct sockaddr *sa; + int ret, one = 1; socklen_t sl; - int ret; conn->to = sock_conn; if (s <= 0) - tcp_sock_set_bufsize(sock_conn); + tcp_sock_set_bufsize(c, sock_conn); + + setsockopt(s, SOL_TCP, TCP_QUICKACK, &one, sizeof(one)); if (v6) { sa = (struct sockaddr *)&addr6; @@ -3157,7 +3163,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port) c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY, tref.u32); if (s > 0) - tcp_sock_set_bufsize(s); + tcp_sock_set_bufsize(c, s); else s = -1; @@ -3170,7 +3176,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port) s = sock_l4(c, AF_INET, IPPROTO_TCP, port, BIND_LOOPBACK, tref.u32); if (s > 0) - tcp_sock_set_bufsize(s); + tcp_sock_set_bufsize(c, s); else s = -1; @@ -3192,7 +3198,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port) c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY, tref.u32); if (s > 0) - tcp_sock_set_bufsize(s); + tcp_sock_set_bufsize(c, s); else s = -1; @@ -3205,7 +3211,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port) s = sock_l4(c, AF_INET6, IPPROTO_TCP, port, BIND_LOOPBACK, tref.u32); if (s > 0) - tcp_sock_set_bufsize(s); + tcp_sock_set_bufsize(c, s); else s = -1; @@ -3287,7 +3293,7 @@ struct tcp_sock_refill_arg { static int tcp_sock_refill(void *arg) { struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg; - int i, *p4, *p6, one = 1; + int i, *p4, *p6; if (a->ns) { if (ns_enter(a->c->pasta_pid)) @@ -3304,8 +3310,7 @@ static int tcp_sock_refill(void *arg) break; } *p4 = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP); - setsockopt(*p4, SOL_TCP, TCP_QUICKACK, &one, sizeof(one)); - tcp_sock_set_bufsize(*p4); + tcp_sock_set_bufsize(a->c, *p4); } for (i = 0; a->c->v6 && i < TCP_SOCK_POOL_SIZE; i++, p6++) { @@ -3314,8 +3319,7 @@ static int tcp_sock_refill(void *arg) } *p6 = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP); - setsockopt(*p6, SOL_TCP, TCP_QUICKACK, &one, sizeof(one)); - tcp_sock_set_bufsize(*p6); + tcp_sock_set_bufsize(a->c, *p6); } return 0; @@ -3334,6 +3338,8 @@ int tcp_sock_init(struct ctx *c, struct timespec *now) getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); + tcp_probe_mem(c); + for (port = 0; port < USHRT_MAX; port++) { if (!bitmap_isset(c->tcp.port_to_tap, port)) continue; diff --git a/tcp.h b/tcp.h index ef78b51..fd483a1 100644 --- a/tcp.h +++ b/tcp.h @@ -51,6 +51,8 @@ union tcp_epoll_ref { * @pipe_size: Size of pipes for spliced connections * @refill_ts: Time of last refill operation for pools of sockets/pipes * @port_detect_ts: Time of last TCP port detection/rebind, if enabled + * @low_wmem: Low probed net.core.wmem_max + * @low_rmem: Low probed net.core.rmem_max */ struct tcp_ctx { uint64_t hash_secret[2]; @@ -65,6 +67,8 @@ struct tcp_ctx { size_t pipe_size; struct timespec refill_ts; struct timespec port_detect_ts; + int low_wmem; + int low_rmem; }; #endif /* TCP_H */ -- cgit v1.2.3