From d4d61480b6883d462a2c3c99eaf315259e78c984 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 5 Oct 2021 19:27:04 +0200 Subject: tcp, tap: Turn tcp_probe_mem() into sock_probe_mem(), use for AF_UNIX socket too Signed-off-by: Stefano Brivio --- passt.c | 1 + passt.h | 5 +++++ tap.c | 8 +++++++- tcp.c | 38 +++----------------------------------- tcp.h | 4 ---- util.c | 28 ++++++++++++++++++++++++++++ util.h | 5 +++++ 7 files changed, 49 insertions(+), 40 deletions(-) diff --git a/passt.c b/passt.c index 1f5f60d..1dc1cca 100644 --- a/passt.c +++ b/passt.c @@ -374,6 +374,7 @@ int main(int argc, char **argv) perror("setrlimit"); exit(EXIT_FAILURE); } + sock_probe_mem(&c); proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4); diff --git a/passt.h b/passt.h index 57b5c09..895dd91 100644 --- a/passt.h +++ b/passt.h @@ -126,6 +126,8 @@ enum passt_modes { * @no_dhcpv6: Disable DHCPv6 server * @no_ndp: Disable NDP handler altogether * @no_ra: Disable router advertisements + * @low_wmem: Low probed net.core.wmem_max + * @low_rmem: Low probed net.core.rmem_max */ struct ctx { enum passt_modes mode; @@ -177,6 +179,9 @@ struct ctx { int no_dhcpv6; int no_ndp; int no_ra; + + int low_wmem; + int low_rmem; }; void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, diff --git a/tap.c b/tap.c index f395227..8d8f381 100644 --- a/tap.c +++ b/tap.c @@ -773,7 +773,7 @@ static void tap_sock_init_unix(struct ctx *c) struct sockaddr_un addr = { .sun_family = AF_UNIX, }; - int i, ret; + int i, ret, v = INT_MAX / 2; if (c->fd_tap_listen) close(c->fd_tap_listen); @@ -833,6 +833,12 @@ static void tap_sock_init_unix(struct ctx *c) addr.sun_path); c->fd_tap = accept(fd, NULL, NULL); + + if (!c->low_rmem) + setsockopt(c->fd_tap, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)); + + if (!c->low_wmem) + setsockopt(c->fd_tap, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)); } static int tun_ns_fd = -1; diff --git a/tcp.c b/tcp.c index cbfad28..e4c7e39 100644 --- a/tcp.c +++ b/tcp.c @@ -341,9 +341,6 @@ #define TCP_TAP_FRAMES 32 -#define RCVBUF_BIG (2 * 1024 * 1024) -#define SNDBUF_BIG (2 * 1024 * 1024) -#define SNDBUF_SMALL (128 * 1024) #define MAX_PIPE_SIZE (2 * 1024 * 1024) #define TCP_HASH_TABLE_LOAD 70 /* % */ @@ -753,33 +750,6 @@ static void tcp_splice_state(struct tcp_splice_conn *conn, enum tcp_state state) conn->state = state; } -/** - * tcp_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed - * @c: Execution context - */ -static void tcp_probe_mem(struct ctx *c) -{ - int v = INT_MAX / 2, s; - socklen_t sl; - - if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { - c->tcp.low_wmem = c->tcp.low_rmem = 1; - return; - } - - sl = sizeof(v); - if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)) || - getsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, &sl) || v < SNDBUF_BIG) - c->tcp.low_wmem = 1; - - v = INT_MAX / 2; - if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)) || - getsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, &sl) || v < RCVBUF_BIG) - c->tcp.low_rmem = 1; - - close(s); -} - /** * tcp_get_sndbuf() - Get, scale SO_SNDBUF between thresholds (1 to 0.5 usage) * @conn: Connection pointer @@ -814,10 +784,10 @@ static void tcp_sock_set_bufsize(struct ctx *c, int s) if (s == -1) return; - if (!c->tcp.low_rmem) + if (!c->low_rmem) setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)); - if (!c->tcp.low_wmem) + if (!c->low_wmem) setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)); } @@ -1325,7 +1295,7 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags, else mss -= sizeof(struct ipv6hdr); - if (c->tcp.low_wmem && + if (c->low_wmem && !conn->local && !tcp_rtt_dst_low(conn)) mss = MIN(mss, PAGE_SIZE); else @@ -3342,8 +3312,6 @@ int tcp_sock_init(struct ctx *c, struct timespec *now) getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); - tcp_probe_mem(c); - for (port = 0; port < USHRT_MAX; port++) { if (!bitmap_isset(c->tcp.port_to_tap, port)) continue; diff --git a/tcp.h b/tcp.h index fd483a1..ef78b51 100644 --- a/tcp.h +++ b/tcp.h @@ -51,8 +51,6 @@ union tcp_epoll_ref { * @pipe_size: Size of pipes for spliced connections * @refill_ts: Time of last refill operation for pools of sockets/pipes * @port_detect_ts: Time of last TCP port detection/rebind, if enabled - * @low_wmem: Low probed net.core.wmem_max - * @low_rmem: Low probed net.core.rmem_max */ struct tcp_ctx { uint64_t hash_secret[2]; @@ -67,8 +65,6 @@ struct tcp_ctx { size_t pipe_size; struct timespec refill_ts; struct timespec port_detect_ts; - int low_wmem; - int low_rmem; }; #endif /* TCP_H */ diff --git a/util.c b/util.c index 3cf3a82..66b088a 100644 --- a/util.c +++ b/util.c @@ -212,6 +212,34 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, return fd; } +/** + * sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed + * @c: Execution context + */ +void sock_probe_mem(struct ctx *c) +{ + int v = INT_MAX / 2, s; + socklen_t sl; + + if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { + c->low_wmem = c->low_rmem = 1; + return; + } + + sl = sizeof(v); + if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)) || + getsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, &sl) || v < SNDBUF_BIG) + c->low_wmem = 1; + + v = INT_MAX / 2; + if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)) || + getsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, &sl) || v < RCVBUF_BIG) + c->low_rmem = 1; + + close(s); +} + + /** * timespec_diff_ms() - Report difference in milliseconds between two timestamps * @a: Minuend timestamp diff --git a/util.h b/util.h index 7efde7b..fdb0ef0 100644 --- a/util.h +++ b/util.h @@ -116,6 +116,10 @@ void debug(const char *format, ...); .daddr = IN6ADDR_ANY_INIT, \ } +#define RCVBUF_BIG (2 * 1024 * 1024) +#define SNDBUF_BIG (4 * 1024 * 1024) +#define SNDBUF_SMALL (128 * 1024) + #include #include #include @@ -133,6 +137,7 @@ struct ctx; char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto); int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, enum bind_type bind_addr, uint32_t data); +void sock_probe_mem(struct ctx *c); int timespec_diff_ms(struct timespec *a, struct timespec *b); void bitmap_set(uint8_t *map, int bit); void bitmap_clear(uint8_t *map, int bit); -- cgit v1.2.3