From 1ee2f7cada9e6f739a00d39bb9821f1ce3493d92 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 21 Mar 2023 23:14:58 +0100 Subject: tcp: Don't reset ACK_TO_TAP_DUE on any ACK, reschedule timer as needed This is mostly symmetric with commit cc6d8286d104 ("tcp: Reset ACK_FROM_TAP_DUE flag only as needed, update timer"): we shouldn't reset the ACK_TO_TAP_DUE flag on any inbound ACK segment, but only once we acknowledge everything we received from the guest or the container. If we don't, a client might unnecessarily hold off further data, especially during slow start, and in general we won't converge to the usable bandwidth. This is very visible especially with traffic tests on links with non-negligible latency, such as in the reported issue. There, a public iperf3 server sometimes aborts the test due do what appears to be a low iperf3's --rcv-timeout (probably less than a second). Even if this doesn't happen, the throughput will converge to a fraction of the usable bandwidth. Clear ACK_TO_TAP_DUE if we acknowledged everything, set it if we didn't, and reschedule the timer in case the flag is still set as the timer expires. While at it, decrease the ACK timer interval to 10ms. A 50ms interval is short enough for any bandwidth-delay product I had in mind (local connections, or non-local connections with limited bandwidth), but here I am, testing 1gbps transfers to a peer with 100ms RTT. Indeed, we could eventually make the timer interval dependent on the current window and estimated bandwidth-delay product, but at least for the moment being, 10ms should be long enough to avoid any measurable syscall overhead, yet usable for any real-world application. Reported-by: Lukas Mrtvy Link: https://bugs.passt.top/show_bug.cgi?id=44 Signed-off-by: Stefano Brivio --- tcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tcp.c b/tcp.c index 590b08a..f156287 100644 --- a/tcp.c +++ b/tcp.c @@ -364,7 +364,7 @@ struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */ # define KERNEL_REPORTS_SND_WND(c) (0 && (c)) #endif -#define ACK_INTERVAL 50 /* ms */ +#define ACK_INTERVAL 10 /* ms */ #define SYN_TIMEOUT 10 /* s */ #define ACK_TIMEOUT 2 #define FIN_TIMEOUT 60 @@ -1730,8 +1730,12 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen, NULL, conn->seq_to_tap); - if (th->ack) - conn_flag(c, conn, ~ACK_TO_TAP_DUE); + if (th->ack) { + if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap)) + conn_flag(c, conn, ~ACK_TO_TAP_DUE); + else + conn_flag(c, conn, ACK_TO_TAP_DUE); + } if (th->fin) conn_flag(c, conn, ACK_FROM_TAP_DUE); @@ -2820,7 +2824,7 @@ static void tcp_timer_handler(struct ctx *c, union epoll_ref ref) if (conn->flags & ACK_TO_TAP_DUE) { tcp_send_flag(c, conn, ACK_IF_NEEDED); - conn_flag(c, conn, ~ACK_TO_TAP_DUE); + tcp_timer_ctl(c, conn); } else if (conn->flags & ACK_FROM_TAP_DUE) { if (!(conn->events & ESTABLISHED)) { debug("TCP: index %li, handshake timeout", CONN_IDX(conn)); -- cgit v1.2.3