diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2023-02-12 22:26:55 +0100 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2023-02-12 22:26:55 +0100 |
commit | cc6d8286d1043d04eb8518e39cebcb9e086dca17 (patch) | |
tree | 68c85479bf3d711f951aeefb53c5c811b502090e | |
parent | ac153595c0427e994bad730ae760b4e0742ec30b (diff) | |
download | passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.gz passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.bz2 passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.lz passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.xz passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.zst passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.zip |
tcp: Reset ACK_FROM_TAP_DUE flag only as needed, update timer
David reports that TCP transfers might stall, especially with smaller
socket buffer sizes, because we reset the ACK_FROM_TAP_DUE flag, in
tcp_tap_handler(), whenever we receive an ACK segment, regardless of
its sequence number and the fact that we might still be waiting for
one. This way, we might fail to re-transmit frames on ACK timeouts.
We need, instead, to:
- indicate with the @retrans field only re-transmissions for the same
data sequences. If we make progress, it should be reset, given that
it's used to abort a connection when we exceed a given number of
re-transmissions for the same data
- unset the ACK_FROM_TAP_DUE flag if and only if the acknowledged
sequence is the same as the last one we sent, as suggested by David
- keep it set otherwise, if progress was done but not all the data we
sent was acknowledged, and update the expiration of the ACK timeout
Add a new helper for these purposes, tcp_update_seqack_from_tap().
To extend the ACK timeout, the new helper sets the ACK_FROM_TAP_DUE
flag, even if it was already set, and conn_flag_do() triggers a timer
update. This part should be revisited at a later time, because,
strictly speaking, ACK_FROM_TAP_DUE isn't a flag anymore. One
possibility might be to introduce another connection attribute for
events affecting timer deadlines.
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Link: https://bugs.passt.top/show_bug.cgi?id=41
Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Fixes: be5bbb9b0681 ("tcp: Rework timers to use timerfd instead of periodic bitmap scan")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r-- | tcp.c | 52 |
1 files changed, 38 insertions, 14 deletions
@@ -757,8 +757,18 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, tcp_flag_str[fls(~flag)]); } } else { - if (conn->flags & flag) + if (conn->flags & flag) { + /* Special case: setting ACK_FROM_TAP_DUE on a + * connection where it's already set is used to + * re-schedule the existing timer. + * TODO: define clearer semantics for timer-related + * flags and factor this into the logic below. + */ + if (flag == ACK_FROM_TAP_DUE) + tcp_timer_ctl(c, conn); + return; + } conn->flags |= flag; if (fls(flag) >= 0) { @@ -1592,6 +1602,26 @@ out: } /** + * tcp_update_seqack_from_tap() - ACK number from tap and related flags/counters + * @c: Execution context + * @conn: Connection pointer + * @seq Current ACK sequence, host order + */ +static void tcp_update_seqack_from_tap(const struct ctx *c, + struct tcp_tap_conn *conn, uint32_t seq) +{ + if (SEQ_GT(seq, conn->seq_ack_from_tap)) { + if (seq == conn->seq_to_tap) + conn_flag(c, conn, ~ACK_FROM_TAP_DUE); + else + conn_flag(c, conn, ACK_FROM_TAP_DUE); + + conn->retrans = 0; + conn->seq_ack_from_tap = seq; + } +} + +/** * tcp_send_flag() - Send segment with flags to tap (no payload) * @c: Execution context * @conn: Connection pointer @@ -2041,7 +2071,6 @@ static int tcp_sock_consume(struct tcp_tap_conn *conn, uint32_t ack_seq) MSG_DONTWAIT | MSG_TRUNC) < 0) return -errno; - conn->seq_ack_from_tap = ack_seq; return 0; } @@ -2333,14 +2362,9 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn, tcp_clamp_window(c, conn, max_ack_seq_wnd); - if (ack) { - if (max_ack_seq == conn->seq_to_tap) { - conn_flag(c, conn, ~ACK_FROM_TAP_DUE); - conn->retrans = 0; - } - - tcp_sock_consume(conn, max_ack_seq); - } + /* On socket flush failure, pretend there was no ACK, try again later */ + if (ack && !tcp_sock_consume(conn, max_ack_seq)) + tcp_update_seqack_from_tap(c, conn, max_ack_seq); if (retr) { trace("TCP: fast re-transmit, ACK: %u, previous sequence: %u", @@ -2492,10 +2516,8 @@ int tcp_tap_handler(struct ctx *c, int af, const void *addr, return p->count; } - if (th->ack) { - conn_flag(c, conn, ~ACK_FROM_TAP_DUE); - conn->retrans = 0; - } + if (th->ack && !(conn->events & ESTABLISHED)) + tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq)); conn_flag(c, conn, ~STALLED); @@ -2543,6 +2565,8 @@ int tcp_tap_handler(struct ctx *c, int af, const void *addr, /* Established connections not accepting data from tap */ if (conn->events & TAP_FIN_RCVD) { + tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq)); + if (conn->events & SOCK_FIN_RCVD && conn->seq_ack_from_tap == conn->seq_to_tap) conn_event(c, conn, CLOSED); |