diff options
| author | Yumei Huang <yuhuang@redhat.com> | 2025-12-02 11:00:07 +0800 |
|---|---|---|
| committer | Stefano Brivio <sbrivio@redhat.com> | 2025-12-02 23:05:08 +0100 |
| commit | 1a834879a2f7ab138c12cd65c610f71eece8a939 (patch) | |
| tree | 195c9a5db61ddc607dc30fdb2a9e11c56d935759 | |
| parent | 3dde0e07804e35f98765bec5bf7b06f46a9948ca (diff) | |
| download | passt-1a834879a2f7ab138c12cd65c610f71eece8a939.tar passt-1a834879a2f7ab138c12cd65c610f71eece8a939.tar.gz passt-1a834879a2f7ab138c12cd65c610f71eece8a939.tar.bz2 passt-1a834879a2f7ab138c12cd65c610f71eece8a939.tar.lz passt-1a834879a2f7ab138c12cd65c610f71eece8a939.tar.xz passt-1a834879a2f7ab138c12cd65c610f71eece8a939.tar.zst passt-1a834879a2f7ab138c12cd65c610f71eece8a939.zip | |
tcp: Clamp the retry timeout
Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
during the handshake and the RTO is below 3 seconds, re-initialise
it to 3 seconds for data retransmissions according to RFC 6298.
Suggested-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Yumei Huang <yuhuang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
| -rw-r--r-- | tcp.c | 25 | ||||
| -rw-r--r-- | tcp.h | 2 | ||||
| -rw-r--r-- | tcp_conn.h | 1 |
3 files changed, 23 insertions, 5 deletions
@@ -187,6 +187,9 @@ * established connections, or (syn_retries + syn_linear_timeouts) times * during the handshake, then reset the connection * + * - RTO_INIT_AFTER_SYN_RETRIES: if SYN retries happened during handshake and + * RTO is less than this, re-initialise RTO to this for data retransmissions + * * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE * with TAP_FIN_SENT event), and no ACK is received within this time, reset * the connection @@ -340,6 +343,7 @@ enum { #define ACK_INTERVAL 10 /* ms */ #define RTO_INIT 1 /* s, RFC 6298 */ +#define RTO_INIT_AFTER_SYN_RETRIES 3 /* s, RFC 6298 */ #define FIN_TIMEOUT 60 #define ACT_TIMEOUT 7200 @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue [TCP_MIGRATE_RCV_QUEUE_MAX]; #define SYN_RETRIES "/proc/sys/net/ipv4/tcp_syn_retries" #define SYN_LINEAR_TIMEOUTS "/proc/sys/net/ipv4/tcp_syn_linear_timeouts" +#define RTO_MAX_MS "/proc/sys/net/ipv4/tcp_rto_max_ms" #define SYN_RETRIES_DEFAULT 6 #define SYN_LINEAR_TIMEOUTS_DEFAULT 4 +#define RTO_MAX_DEFAULT 120 /* s */ #define MAX_SYNCNT 127 /* derived from kernel's limit */ /* "Extended" data (not stored in the flow table) for TCP flow migration */ @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = { static const char *tcp_flag_str[] __attribute((__unused__)) = { "STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE", - "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", + "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED", }; /* Listening sockets, used for automatic port forwarding in pasta mode only */ @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) if (conn->flags & ACK_TO_TAP_DUE) { it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000; } else if (conn->flags & ACK_FROM_TAP_DUE) { - int exp = conn->retries; + int exp = conn->retries, timeout = RTO_INIT; if (!(conn->events & ESTABLISHED)) exp -= c->tcp.syn_linear_timeouts; - it.it_value.tv_sec = RTO_INIT << MAX(exp, 0); + else if (conn->flags & SYN_RETRIED) + timeout = MAX(timeout, RTO_INIT_AFTER_SYN_RETRIES); + timeout <<= MAX(exp, 0); + it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max); } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { it.it_value.tv_sec = FIN_TIMEOUT; } else { @@ -2441,6 +2450,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) flow_trace(conn, "SYN timeout, retry"); tcp_send_flag(c, conn, SYN); conn->retries++; + conn_flag(c, conn, SYN_RETRIED); tcp_timer_ctl(c, conn); } } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { @@ -2812,10 +2822,15 @@ static void tcp_get_rto_params(struct ctx *c) v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT); c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT); + v = read_file_integer(RTO_MAX_MS, (intmax_t)(RTO_MAX_DEFAULT * 1000)); + c->tcp.rto_max = MIN(DIV_ROUND_UP(v, 1000), INT_MAX); + debug("Using TCP RTO parameters, syn_retries: %"PRIu8 - ", syn_linear_timeouts: %"PRIu8, + ", syn_linear_timeouts: %"PRIu8 + ", rto_max: %d", c->tcp.syn_retries, - c->tcp.syn_linear_timeouts); + c->tcp.syn_linear_timeouts, + c->tcp.rto_max); } /** @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref { * @fwd_out: Port forwarding configuration for outbound packets * @timer_run: Timestamp of most recent timer run * @pipe_size: Size of pipes for spliced connections + * @rto_max: Maximum retry timeout (in s) * @syn_retries: SYN retries using exponential backoff timeout * @syn_linear_timeouts: SYN retries before using exponential backoff timeout */ @@ -68,6 +69,7 @@ struct tcp_ctx { struct fwd_ports fwd_out; struct timespec timer_run; size_t pipe_size; + int rto_max; uint8_t syn_retries; uint8_t syn_linear_timeouts; }; @@ -77,6 +77,7 @@ struct tcp_tap_conn { #define ACK_TO_TAP_DUE BIT(3) #define ACK_FROM_TAP_DUE BIT(4) #define ACK_FROM_TAP_BLOCKS BIT(5) +#define SYN_RETRIED BIT(6) #define SNDBUF_BITS 24 unsigned int sndbuf :SNDBUF_BITS; |
