From be5bbb9b06811b98f677460fd2b89001db580582 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 18 Mar 2022 12:18:19 +0100 Subject: tcp: Rework timers to use timerfd instead of periodic bitmap scan With a lot of concurrent connections, the bitmap scan approach is not really sustainable. Switch to per-connection timerfd timers, set based on events and on two new flags, ACK_FROM_TAP_DUE and ACK_TO_TAP_DUE. Timers are added to the common epoll list, and implement the existing timeouts. While at it, drop the CONN_ prefix from flag names, otherwise they get quite long, and fix the logic to decide if a connection has a local, possibly unreachable endpoint: we shouldn't go through the rest of tcp_conn_from_tap() if we reset the connection due to a successful bind(2), and we'll get EACCES if the port number is low. Suggested by: Stefan Hajnoczi Signed-off-by: Stefano Brivio --- tcp.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tcp.h') diff --git a/tcp.h b/tcp.h index b4e3fde..3154b4b 100644 --- a/tcp.h +++ b/tcp.h @@ -6,7 +6,9 @@ #ifndef TCP_H #define TCP_H -#define TCP_TIMER_INTERVAL 20 /* ms */ +#define REFILL_INTERVAL 1000 /* ms */ +#define PORT_DETECT_INTERVAL 1000 +#define TCP_TIMER_INTERVAL MIN(REFILL_INTERVAL, PORT_DETECT_INTERVAL) #define TCP_MAX_CONNS (128 * 1024) #define TCP_MAX_SOCKS (TCP_MAX_CONNS + USHRT_MAX * 2) @@ -21,7 +23,7 @@ int tcp_tap_handler(struct ctx *c, int af, void *addr, struct tap_l4_msg *msg, int count, struct timespec *now); int tcp_sock_init(struct ctx *c, struct timespec *now); void tcp_timer(struct ctx *c, struct timespec *now); -void tcp_defer_handler(struct ctx *c, struct timespec *now); +void tcp_defer_handler(struct ctx *c); void tcp_sock_set_bufsize(struct ctx *c, int s); void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, @@ -34,6 +36,7 @@ void tcp_remap_to_init(in_port_t port, in_port_t delta); * @listen: Set if this file descriptor is a listening socket * @splice: Set if descriptor is associated to a spliced connection * @v6: Set for IPv6 sockets or connections + * @timer: Reference is a timerfd descriptor for connection * @index: Index of connection in table, or port for bound sockets * @u32: Opaque u32 value of reference */ @@ -42,6 +45,7 @@ union tcp_epoll_ref { uint32_t listen:1, splice:1, v6:1, + timer:1, index:20; } tcp; uint32_t u32; -- cgit v1.2.3