aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-04-25 13:34:04 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-04-29 17:15:26 +0200
commitdb1fe773a3eacbf2b3ce0b3333f1684291fa5c2d (patch)
tree72c07fb27d823acb1f48ca70284a7298c1060d4d
parent48afbe321eddfb68966a4436884c022e64c3e166 (diff)
downloadpasst-db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d.tar
passt-db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d.tar.gz
passt-db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d.tar.bz2
passt-db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d.tar.lz
passt-db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d.tar.xz
passt-db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d.tar.zst
passt-db1fe773a3eacbf2b3ce0b3333f1684291fa5c2d.zip
tcp: Avoid SO_ACCEPTCONN getsockopt() by noting listening/data sockets numbers
...the rest is reshuffling existing macros to use the bits we need in TCP code. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--passt.c51
-rw-r--r--tcp.c46
-rw-r--r--tcp.h8
-rw-r--r--util.c19
-rw-r--r--util.h19
5 files changed, 87 insertions, 56 deletions
diff --git a/passt.c b/passt.c
index 2fc88cf..3a56f67 100644
--- a/passt.c
+++ b/passt.c
@@ -613,40 +613,39 @@ static int tap_handler(struct ctx *c)
/**
* sock_handler() - Event handler for L4 sockets
* @c: Execution context
- * @fd: File descriptor associated to event
+ * @s: Socket associated to event
* @events epoll events
*/
-static void sock_handler(struct ctx *c, int fd, uint32_t events)
+static void sock_handler(struct ctx *c, int s, uint32_t events)
{
socklen_t sl;
- int so;
-
- sl = sizeof(so);
-
-#define IN(x, proto) (x >= c->proto.fd_min && x <= c->proto.fd_max)
-
- if (IN(fd, udp) && !IN(fd, icmp) && !IN(fd, tcp))
- so = IPPROTO_UDP;
- else if (IN(fd, tcp) && !IN(fd, icmp) && !IN(fd, udp))
- so = IPPROTO_TCP;
- else if (IN(fd, icmp) && !IN(fd, udp) && !IN(fd, tcp))
- so = IPPROTO_ICMP; /* Fits ICMPv6 below, too */
- else if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &so, &sl)) {
- epoll_ctl(c->epollfd, EPOLL_CTL_DEL, fd, NULL);
- close(fd);
+ int proto;
+
+ sl = sizeof(proto);
+
+ if ( FD_PROTO(s, udp) && !FD_PROTO(s, icmp) && !FD_PROTO(s, tcp))
+ proto = IPPROTO_UDP;
+ else if (FD_PROTO(s, tcp) && !FD_PROTO(s, icmp) && !FD_PROTO(s, udp))
+ proto = IPPROTO_TCP;
+ else if (FD_PROTO(s, icmp) && !FD_PROTO(s, udp) && !FD_PROTO(s, tcp))
+ proto = IPPROTO_ICMP; /* Fits ICMPv6 below, too */
+ else if (getsockopt(s, SOL_SOCKET, SO_PROTOCOL, &proto, &sl))
+ proto = -1;
+
+ if (proto == -1) {
+ epoll_ctl(c->epollfd, EPOLL_CTL_DEL, s, NULL);
+ close(s);
return;
}
-#undef IN
-
- debug("%s: packet from socket %i", getprotobynumber(so)->p_name, fd);
+ debug("%s: packet from socket %i", getprotobynumber(proto)->p_name, s);
- if (so == IPPROTO_ICMP || so == IPPROTO_ICMPV6)
- icmp_sock_handler(c, fd, events);
- else if (so == IPPROTO_TCP)
- tcp_sock_handler(c, fd, events);
- else if (so == IPPROTO_UDP)
- udp_sock_handler(c, fd, events);
+ if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
+ icmp_sock_handler(c, s, events);
+ else if (proto == IPPROTO_TCP)
+ tcp_sock_handler(c, s, events);
+ else if (proto == IPPROTO_UDP)
+ udp_sock_handler(c, s, events);
}
/**
diff --git a/tcp.c b/tcp.c
index 1713dbc..5ba8f3f 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1003,10 +1003,8 @@ static void tcp_conn_from_sock(struct ctx *c, int fd)
if (s == -1)
return;
- if (s < c->tcp.fd_min)
- c->tcp.fd_min = s;
- if (s > c->tcp.fd_max)
- c->tcp.fd_max = s;
+ CHECK_SET_MIN_MAX(c->tcp.fd_, s);
+ CHECK_SET_MIN_MAX(c->tcp.fd_conn_, s);
if (sa_l.ss_family == AF_INET) {
struct sockaddr_in *sa4 = (struct sockaddr_in *)&sa_r;
@@ -1406,7 +1404,7 @@ static void tcp_connect_finish(struct ctx *c, int s)
void tcp_sock_handler(struct ctx *c, int s, uint32_t events)
{
socklen_t sl;
- int so;
+ int accept;
if (tc[s].s == LAST_ACK) {
tcp_send_to_tap(c, s, ACK, NULL, 0);
@@ -1414,21 +1412,28 @@ void tcp_sock_handler(struct ctx *c, int s, uint32_t events)
return;
}
+
if (tc[s].s == SOCK_SYN_SENT) {
/* This can only be a socket error or a shutdown from remote */
tcp_rst(c, s);
return;
}
-
- sl = sizeof(so);
- if ((events & EPOLLERR) ||
- getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, &so, &sl)) {
+ if (IN_INTERVAL(c->tcp.fd_listen_min, c->tcp.fd_listen_max, s) &&
+ !IN_INTERVAL(c->tcp.fd_conn_min, c->tcp.fd_conn_max, s))
+ accept = 1;
+ else if (IN_INTERVAL(c->tcp.fd_conn_min, c->tcp.fd_conn_max, s) &&
+ !IN_INTERVAL(c->tcp.fd_listen_min, c->tcp.fd_listen_max, s))
+ accept = 0;
+ else if (getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, &accept, &sl))
+ accept = -1;
+
+ if ((events & EPOLLERR) || accept == -1) {
if (tc[s].s != CLOSED)
tcp_rst(c, s);
return;
}
- if (so) {
+ if (accept) {
tcp_conn_from_sock(c, s);
return;
}
@@ -1466,15 +1471,24 @@ void tcp_sock_handler(struct ctx *c, int s, uint32_t events)
int tcp_sock_init(struct ctx *c)
{
in_port_t port;
+ int s = 0;
- c->tcp.fd_min = INT_MAX;
- c->tcp.fd_max = 0;
+ c->tcp.fd_min = c->tcp.fd_listen_min = c->tcp.fd_conn_min = INT_MAX;
+ c->tcp.fd_max = c->tcp.fd_listen_max = c->tcp.fd_conn_max = 0;
+ CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s);
for (port = 0; port < (1 << 15) + (1 << 14); port++) {
- if (c->v4 && sock_l4_add(c, 4, IPPROTO_TCP, port) < 0)
- return -1;
- if (c->v6 && sock_l4_add(c, 6, IPPROTO_TCP, port) < 0)
- return -1;
+ if (c->v4) {
+ if ((s = sock_l4_add(c, 4, IPPROTO_TCP, port)) < 0)
+ return -1;
+ CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s);
+ }
+
+ if (c->v6) {
+ if ((s = sock_l4_add(c, 6, IPPROTO_TCP, port)) < 0)
+ return -1;
+ CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s);
+ }
}
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
diff --git a/tcp.h b/tcp.h
index 2a79a75..4604281 100644
--- a/tcp.h
+++ b/tcp.h
@@ -14,11 +14,19 @@ void tcp_timer(struct ctx *c, struct timespec *ts);
* @hash_secret: 128-bit secret for hash functions, ISN and hash table
* @fd_min: Lowest file descriptor number for TCP ever used
* @fd_max: Highest file descriptor number for TCP ever used
+ * @fd_listen_min: Lowest file descriptor number for listening sockets
+ * @fd_listen_max: Highest file descriptor number for listening sockets
+ * @fd_conn_min: Lowest file descriptor number for connected sockets
+ * @fd_conn_max: Highest file descriptor number for connected sockets
*/
struct tcp_ctx {
uint64_t hash_secret[2];
int fd_min;
int fd_max;
+ int fd_listen_min;
+ int fd_listen_max;
+ int fd_conn_min;
+ int fd_conn_max;
};
#endif /* TCP_H */
diff --git a/util.c b/util.c
index cc96a1a..acd62a9 100644
--- a/util.c
+++ b/util.c
@@ -25,6 +25,7 @@
#include <stdarg.h>
#include "passt.h"
+#include "util.h"
#define logfn(name, level) \
void name(const char *format, ...) { \
@@ -189,20 +190,10 @@ int sock_l4_add(struct ctx *c, int v, uint16_t proto, uint16_t port)
return -1;
}
-#define CHECK_SET_MIN_MAX(ipproto, proto_ctx, fd) \
- if (proto == (ipproto)) { \
- if (fd < c->proto_ctx.fd_min) \
- c->proto_ctx.fd_min = (fd); \
- if (fd > c->proto_ctx.fd_max) \
- c->proto_ctx.fd_max = (fd); \
- }
-
- CHECK_SET_MIN_MAX(IPPROTO_ICMP, icmp, fd);
- CHECK_SET_MIN_MAX(IPPROTO_ICMPV6, icmp, fd);
- CHECK_SET_MIN_MAX(IPPROTO_TCP, tcp, fd);
- CHECK_SET_MIN_MAX(IPPROTO_UDP, udp, fd);
-
-#undef CHECK_SET_MIN_MAX
+ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_ICMP, icmp, fd);
+ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_ICMPV6, icmp, fd);
+ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_TCP, tcp, fd);
+ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_UDP, udp, fd);
if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
goto epoll_add;
diff --git a/util.h b/util.h
index 139638c..34b8266 100644
--- a/util.h
+++ b/util.h
@@ -8,6 +8,25 @@ void debug(const char *format, ...);
#define debug(...) { }
#endif
+#define CHECK_SET_MIN_MAX(basename, fd) \
+ do { \
+ if ((fd) < basename##min) \
+ basename##min = (fd); \
+ if ((fd) > basename##max) \
+ basename##max = (fd); \
+ } while (0)
+
+#define CHECK_SET_MIN_MAX_PROTO_FD(proto, ipproto, proto_ctx, fd) \
+ do { \
+ if ((proto) == (ipproto)) \
+ CHECK_SET_MIN_MAX(c->proto_ctx.fd_, (fd)); \
+ } while (0)
+
+#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
+
+#define FD_PROTO(x, proto) \
+ (IN_INTERVAL(c->proto.fd_min, c->proto.fd_max, (x)))
+
uint16_t csum_fold(uint32_t sum);
uint16_t csum_ip4(void *buf, size_t len);
void csum_tcp4(struct iphdr *iph);