From 1a834879a2f7ab138c12cd65c610f71eece8a939 Mon Sep 17 00:00:00 2001 From: Yumei Huang Date: Tue, 2 Dec 2025 11:00:07 +0800 Subject: tcp: Clamp the retry timeout Clamp the TCP retry timeout as Linux kernel does. If a retry occurs during the handshake and the RTO is below 3 seconds, re-initialise it to 3 seconds for data retransmissions according to RFC 6298. Suggested-by: Stefano Brivio Signed-off-by: Yumei Huang Reviewed-by: David Gibson Signed-off-by: Stefano Brivio --- tcp.c | 25 ++++++++++++++++++++----- tcp.h | 2 ++ tcp_conn.h | 1 + 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/tcp.c b/tcp.c index cff23d1..3ba2e03 100644 --- a/tcp.c +++ b/tcp.c @@ -187,6 +187,9 @@ * established connections, or (syn_retries + syn_linear_timeouts) times * during the handshake, then reset the connection * + * - RTO_INIT_AFTER_SYN_RETRIES: if SYN retries happened during handshake and + * RTO is less than this, re-initialise RTO to this for data retransmissions + * * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE * with TAP_FIN_SENT event), and no ACK is received within this time, reset * the connection @@ -340,6 +343,7 @@ enum { #define ACK_INTERVAL 10 /* ms */ #define RTO_INIT 1 /* s, RFC 6298 */ +#define RTO_INIT_AFTER_SYN_RETRIES 3 /* s, RFC 6298 */ #define FIN_TIMEOUT 60 #define ACT_TIMEOUT 7200 @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue [TCP_MIGRATE_RCV_QUEUE_MAX]; #define SYN_RETRIES "/proc/sys/net/ipv4/tcp_syn_retries" #define SYN_LINEAR_TIMEOUTS "/proc/sys/net/ipv4/tcp_syn_linear_timeouts" +#define RTO_MAX_MS "/proc/sys/net/ipv4/tcp_rto_max_ms" #define SYN_RETRIES_DEFAULT 6 #define SYN_LINEAR_TIMEOUTS_DEFAULT 4 +#define RTO_MAX_DEFAULT 120 /* s */ #define MAX_SYNCNT 127 /* derived from kernel's limit */ /* "Extended" data (not stored in the flow table) for TCP flow migration */ @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = { static const char *tcp_flag_str[] __attribute((__unused__)) = { "STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE", - "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", + "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED", }; /* Listening sockets, used for automatic port forwarding in pasta mode only */ @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) if (conn->flags & ACK_TO_TAP_DUE) { it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000; } else if (conn->flags & ACK_FROM_TAP_DUE) { - int exp = conn->retries; + int exp = conn->retries, timeout = RTO_INIT; if (!(conn->events & ESTABLISHED)) exp -= c->tcp.syn_linear_timeouts; - it.it_value.tv_sec = RTO_INIT << MAX(exp, 0); + else if (conn->flags & SYN_RETRIED) + timeout = MAX(timeout, RTO_INIT_AFTER_SYN_RETRIES); + timeout <<= MAX(exp, 0); + it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max); } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { it.it_value.tv_sec = FIN_TIMEOUT; } else { @@ -2441,6 +2450,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) flow_trace(conn, "SYN timeout, retry"); tcp_send_flag(c, conn, SYN); conn->retries++; + conn_flag(c, conn, SYN_RETRIED); tcp_timer_ctl(c, conn); } } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { @@ -2812,10 +2822,15 @@ static void tcp_get_rto_params(struct ctx *c) v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT); c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT); + v = read_file_integer(RTO_MAX_MS, (intmax_t)(RTO_MAX_DEFAULT * 1000)); + c->tcp.rto_max = MIN(DIV_ROUND_UP(v, 1000), INT_MAX); + debug("Using TCP RTO parameters, syn_retries: %"PRIu8 - ", syn_linear_timeouts: %"PRIu8, + ", syn_linear_timeouts: %"PRIu8 + ", rto_max: %d", c->tcp.syn_retries, - c->tcp.syn_linear_timeouts); + c->tcp.syn_linear_timeouts, + c->tcp.rto_max); } /** diff --git a/tcp.h b/tcp.h index 37d7758..6fb6f92 100644 --- a/tcp.h +++ b/tcp.h @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref { * @fwd_out: Port forwarding configuration for outbound packets * @timer_run: Timestamp of most recent timer run * @pipe_size: Size of pipes for spliced connections + * @rto_max: Maximum retry timeout (in s) * @syn_retries: SYN retries using exponential backoff timeout * @syn_linear_timeouts: SYN retries before using exponential backoff timeout */ @@ -68,6 +69,7 @@ struct tcp_ctx { struct fwd_ports fwd_out; struct timespec timer_run; size_t pipe_size; + int rto_max; uint8_t syn_retries; uint8_t syn_linear_timeouts; }; diff --git a/tcp_conn.h b/tcp_conn.h index 923af36..e36910c 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -77,6 +77,7 @@ struct tcp_tap_conn { #define ACK_TO_TAP_DUE BIT(3) #define ACK_FROM_TAP_DUE BIT(4) #define ACK_FROM_TAP_BLOCKS BIT(5) +#define SYN_RETRIED BIT(6) #define SNDBUF_BITS 24 unsigned int sndbuf :SNDBUF_BITS; -- cgit v1.2.3