From db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sun, 25 Apr 2021 13:34:04 +0200 Subject: tcp: Avoid SO_ACCEPTCONN getsockopt() by noting listening/data sockets numbers ...the rest is reshuffling existing macros to use the bits we need in TCP code. Signed-off-by: Stefano Brivio --- passt.c | 51 +++++++++++++++++++++++++-------------------------- tcp.c | 46 ++++++++++++++++++++++++++++++---------------- tcp.h | 8 ++++++++ util.c | 19 +++++-------------- util.h | 19 +++++++++++++++++++ 5 files changed, 87 insertions(+), 56 deletions(-) diff --git a/passt.c b/passt.c index 2fc88cf..3a56f67 100644 --- a/passt.c +++ b/passt.c @@ -613,40 +613,39 @@ static int tap_handler(struct ctx *c) /** * sock_handler() - Event handler for L4 sockets * @c: Execution context - * @fd: File descriptor associated to event + * @s: Socket associated to event * @events epoll events */ -static void sock_handler(struct ctx *c, int fd, uint32_t events) +static void sock_handler(struct ctx *c, int s, uint32_t events) { socklen_t sl; - int so; - - sl = sizeof(so); - -#define IN(x, proto) (x >= c->proto.fd_min && x <= c->proto.fd_max) - - if (IN(fd, udp) && !IN(fd, icmp) && !IN(fd, tcp)) - so = IPPROTO_UDP; - else if (IN(fd, tcp) && !IN(fd, icmp) && !IN(fd, udp)) - so = IPPROTO_TCP; - else if (IN(fd, icmp) && !IN(fd, udp) && !IN(fd, tcp)) - so = IPPROTO_ICMP; /* Fits ICMPv6 below, too */ - else if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &so, &sl)) { - epoll_ctl(c->epollfd, EPOLL_CTL_DEL, fd, NULL); - close(fd); + int proto; + + sl = sizeof(proto); + + if ( FD_PROTO(s, udp) && !FD_PROTO(s, icmp) && !FD_PROTO(s, tcp)) + proto = IPPROTO_UDP; + else if (FD_PROTO(s, tcp) && !FD_PROTO(s, icmp) && !FD_PROTO(s, udp)) + proto = IPPROTO_TCP; + else if (FD_PROTO(s, icmp) && !FD_PROTO(s, udp) && !FD_PROTO(s, tcp)) + proto = IPPROTO_ICMP; /* Fits ICMPv6 below, too */ + else if (getsockopt(s, SOL_SOCKET, SO_PROTOCOL, &proto, &sl)) + proto = -1; + + if (proto == -1) { + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, s, NULL); + close(s); return; } -#undef IN - - debug("%s: packet from socket %i", getprotobynumber(so)->p_name, fd); + debug("%s: packet from socket %i", getprotobynumber(proto)->p_name, s); - if (so == IPPROTO_ICMP || so == IPPROTO_ICMPV6) - icmp_sock_handler(c, fd, events); - else if (so == IPPROTO_TCP) - tcp_sock_handler(c, fd, events); - else if (so == IPPROTO_UDP) - udp_sock_handler(c, fd, events); + if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) + icmp_sock_handler(c, s, events); + else if (proto == IPPROTO_TCP) + tcp_sock_handler(c, s, events); + else if (proto == IPPROTO_UDP) + udp_sock_handler(c, s, events); } /** diff --git a/tcp.c b/tcp.c index 1713dbc..5ba8f3f 100644 --- a/tcp.c +++ b/tcp.c @@ -1003,10 +1003,8 @@ static void tcp_conn_from_sock(struct ctx *c, int fd) if (s == -1) return; - if (s < c->tcp.fd_min) - c->tcp.fd_min = s; - if (s > c->tcp.fd_max) - c->tcp.fd_max = s; + CHECK_SET_MIN_MAX(c->tcp.fd_, s); + CHECK_SET_MIN_MAX(c->tcp.fd_conn_, s); if (sa_l.ss_family == AF_INET) { struct sockaddr_in *sa4 = (struct sockaddr_in *)&sa_r; @@ -1406,7 +1404,7 @@ static void tcp_connect_finish(struct ctx *c, int s) void tcp_sock_handler(struct ctx *c, int s, uint32_t events) { socklen_t sl; - int so; + int accept; if (tc[s].s == LAST_ACK) { tcp_send_to_tap(c, s, ACK, NULL, 0); @@ -1414,21 +1412,28 @@ void tcp_sock_handler(struct ctx *c, int s, uint32_t events) return; } + if (tc[s].s == SOCK_SYN_SENT) { /* This can only be a socket error or a shutdown from remote */ tcp_rst(c, s); return; } - - sl = sizeof(so); - if ((events & EPOLLERR) || - getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, &so, &sl)) { + if (IN_INTERVAL(c->tcp.fd_listen_min, c->tcp.fd_listen_max, s) && + !IN_INTERVAL(c->tcp.fd_conn_min, c->tcp.fd_conn_max, s)) + accept = 1; + else if (IN_INTERVAL(c->tcp.fd_conn_min, c->tcp.fd_conn_max, s) && + !IN_INTERVAL(c->tcp.fd_listen_min, c->tcp.fd_listen_max, s)) + accept = 0; + else if (getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, &accept, &sl)) + accept = -1; + + if ((events & EPOLLERR) || accept == -1) { if (tc[s].s != CLOSED) tcp_rst(c, s); return; } - if (so) { + if (accept) { tcp_conn_from_sock(c, s); return; } @@ -1466,15 +1471,24 @@ void tcp_sock_handler(struct ctx *c, int s, uint32_t events) int tcp_sock_init(struct ctx *c) { in_port_t port; + int s = 0; - c->tcp.fd_min = INT_MAX; - c->tcp.fd_max = 0; + c->tcp.fd_min = c->tcp.fd_listen_min = c->tcp.fd_conn_min = INT_MAX; + c->tcp.fd_max = c->tcp.fd_listen_max = c->tcp.fd_conn_max = 0; + CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s); for (port = 0; port < (1 << 15) + (1 << 14); port++) { - if (c->v4 && sock_l4_add(c, 4, IPPROTO_TCP, port) < 0) - return -1; - if (c->v6 && sock_l4_add(c, 6, IPPROTO_TCP, port) < 0) - return -1; + if (c->v4) { + if ((s = sock_l4_add(c, 4, IPPROTO_TCP, port)) < 0) + return -1; + CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s); + } + + if (c->v6) { + if ((s = sock_l4_add(c, 6, IPPROTO_TCP, port)) < 0) + return -1; + CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s); + } } getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); diff --git a/tcp.h b/tcp.h index 2a79a75..4604281 100644 --- a/tcp.h +++ b/tcp.h @@ -14,11 +14,19 @@ void tcp_timer(struct ctx *c, struct timespec *ts); * @hash_secret: 128-bit secret for hash functions, ISN and hash table * @fd_min: Lowest file descriptor number for TCP ever used * @fd_max: Highest file descriptor number for TCP ever used + * @fd_listen_min: Lowest file descriptor number for listening sockets + * @fd_listen_max: Highest file descriptor number for listening sockets + * @fd_conn_min: Lowest file descriptor number for connected sockets + * @fd_conn_max: Highest file descriptor number for connected sockets */ struct tcp_ctx { uint64_t hash_secret[2]; int fd_min; int fd_max; + int fd_listen_min; + int fd_listen_max; + int fd_conn_min; + int fd_conn_max; }; #endif /* TCP_H */ diff --git a/util.c b/util.c index cc96a1a..acd62a9 100644 --- a/util.c +++ b/util.c @@ -25,6 +25,7 @@ #include #include "passt.h" +#include "util.h" #define logfn(name, level) \ void name(const char *format, ...) { \ @@ -189,20 +190,10 @@ int sock_l4_add(struct ctx *c, int v, uint16_t proto, uint16_t port) return -1; } -#define CHECK_SET_MIN_MAX(ipproto, proto_ctx, fd) \ - if (proto == (ipproto)) { \ - if (fd < c->proto_ctx.fd_min) \ - c->proto_ctx.fd_min = (fd); \ - if (fd > c->proto_ctx.fd_max) \ - c->proto_ctx.fd_max = (fd); \ - } - - CHECK_SET_MIN_MAX(IPPROTO_ICMP, icmp, fd); - CHECK_SET_MIN_MAX(IPPROTO_ICMPV6, icmp, fd); - CHECK_SET_MIN_MAX(IPPROTO_TCP, tcp, fd); - CHECK_SET_MIN_MAX(IPPROTO_UDP, udp, fd); - -#undef CHECK_SET_MIN_MAX + CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_ICMP, icmp, fd); + CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_ICMPV6, icmp, fd); + CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_TCP, tcp, fd); + CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_UDP, udp, fd); if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) goto epoll_add; diff --git a/util.h b/util.h index 139638c..34b8266 100644 --- a/util.h +++ b/util.h @@ -8,6 +8,25 @@ void debug(const char *format, ...); #define debug(...) { } #endif +#define CHECK_SET_MIN_MAX(basename, fd) \ + do { \ + if ((fd) < basename##min) \ + basename##min = (fd); \ + if ((fd) > basename##max) \ + basename##max = (fd); \ + } while (0) + +#define CHECK_SET_MIN_MAX_PROTO_FD(proto, ipproto, proto_ctx, fd) \ + do { \ + if ((proto) == (ipproto)) \ + CHECK_SET_MIN_MAX(c->proto_ctx.fd_, (fd)); \ + } while (0) + +#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b)) + +#define FD_PROTO(x, proto) \ + (IN_INTERVAL(c->proto.fd_min, c->proto.fd_max, (x))) + uint16_t csum_fold(uint32_t sum); uint16_t csum_ip4(void *buf, size_t len); void csum_tcp4(struct iphdr *iph); -- cgit v1.2.3