aboutgitcodebugslistschat
path: root/tcp.c
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2023-02-12 22:26:55 +0100
committerStefano Brivio <sbrivio@redhat.com>2023-02-12 22:26:55 +0100
commitcc6d8286d1043d04eb8518e39cebcb9e086dca17 (patch)
tree68c85479bf3d711f951aeefb53c5c811b502090e /tcp.c
parentac153595c0427e994bad730ae760b4e0742ec30b (diff)
downloadpasst-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar
passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.gz
passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.bz2
passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.lz
passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.xz
passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.tar.zst
passt-cc6d8286d1043d04eb8518e39cebcb9e086dca17.zip
tcp: Reset ACK_FROM_TAP_DUE flag only as needed, update timer
David reports that TCP transfers might stall, especially with smaller socket buffer sizes, because we reset the ACK_FROM_TAP_DUE flag, in tcp_tap_handler(), whenever we receive an ACK segment, regardless of its sequence number and the fact that we might still be waiting for one. This way, we might fail to re-transmit frames on ACK timeouts. We need, instead, to: - indicate with the @retrans field only re-transmissions for the same data sequences. If we make progress, it should be reset, given that it's used to abort a connection when we exceed a given number of re-transmissions for the same data - unset the ACK_FROM_TAP_DUE flag if and only if the acknowledged sequence is the same as the last one we sent, as suggested by David - keep it set otherwise, if progress was done but not all the data we sent was acknowledged, and update the expiration of the ACK timeout Add a new helper for these purposes, tcp_update_seqack_from_tap(). To extend the ACK timeout, the new helper sets the ACK_FROM_TAP_DUE flag, even if it was already set, and conn_flag_do() triggers a timer update. This part should be revisited at a later time, because, strictly speaking, ACK_FROM_TAP_DUE isn't a flag anymore. One possibility might be to introduce another connection attribute for events affecting timer deadlines. Reported-by: David Gibson <david@gibson.dropbear.id.au> Link: https://bugs.passt.top/show_bug.cgi?id=41 Suggested-by: David Gibson <david@gibson.dropbear.id.au> Fixes: be5bbb9b0681 ("tcp: Rework timers to use timerfd instead of periodic bitmap scan") Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'tcp.c')
-rw-r--r--tcp.c52
1 files changed, 38 insertions, 14 deletions
diff --git a/tcp.c b/tcp.c
index 1e0a338..dfa8b6d 100644
--- a/tcp.c
+++ b/tcp.c
@@ -757,8 +757,18 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
tcp_flag_str[fls(~flag)]);
}
} else {
- if (conn->flags & flag)
+ if (conn->flags & flag) {
+ /* Special case: setting ACK_FROM_TAP_DUE on a
+ * connection where it's already set is used to
+ * re-schedule the existing timer.
+ * TODO: define clearer semantics for timer-related
+ * flags and factor this into the logic below.
+ */
+ if (flag == ACK_FROM_TAP_DUE)
+ tcp_timer_ctl(c, conn);
+
return;
+ }
conn->flags |= flag;
if (fls(flag) >= 0) {
@@ -1592,6 +1602,26 @@ out:
}
/**
+ * tcp_update_seqack_from_tap() - ACK number from tap and related flags/counters
+ * @c: Execution context
+ * @conn: Connection pointer
+ * @seq Current ACK sequence, host order
+ */
+static void tcp_update_seqack_from_tap(const struct ctx *c,
+ struct tcp_tap_conn *conn, uint32_t seq)
+{
+ if (SEQ_GT(seq, conn->seq_ack_from_tap)) {
+ if (seq == conn->seq_to_tap)
+ conn_flag(c, conn, ~ACK_FROM_TAP_DUE);
+ else
+ conn_flag(c, conn, ACK_FROM_TAP_DUE);
+
+ conn->retrans = 0;
+ conn->seq_ack_from_tap = seq;
+ }
+}
+
+/**
* tcp_send_flag() - Send segment with flags to tap (no payload)
* @c: Execution context
* @conn: Connection pointer
@@ -2041,7 +2071,6 @@ static int tcp_sock_consume(struct tcp_tap_conn *conn, uint32_t ack_seq)
MSG_DONTWAIT | MSG_TRUNC) < 0)
return -errno;
- conn->seq_ack_from_tap = ack_seq;
return 0;
}
@@ -2333,14 +2362,9 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
tcp_clamp_window(c, conn, max_ack_seq_wnd);
- if (ack) {
- if (max_ack_seq == conn->seq_to_tap) {
- conn_flag(c, conn, ~ACK_FROM_TAP_DUE);
- conn->retrans = 0;
- }
-
- tcp_sock_consume(conn, max_ack_seq);
- }
+ /* On socket flush failure, pretend there was no ACK, try again later */
+ if (ack && !tcp_sock_consume(conn, max_ack_seq))
+ tcp_update_seqack_from_tap(c, conn, max_ack_seq);
if (retr) {
trace("TCP: fast re-transmit, ACK: %u, previous sequence: %u",
@@ -2492,10 +2516,8 @@ int tcp_tap_handler(struct ctx *c, int af, const void *addr,
return p->count;
}
- if (th->ack) {
- conn_flag(c, conn, ~ACK_FROM_TAP_DUE);
- conn->retrans = 0;
- }
+ if (th->ack && !(conn->events & ESTABLISHED))
+ tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq));
conn_flag(c, conn, ~STALLED);
@@ -2543,6 +2565,8 @@ int tcp_tap_handler(struct ctx *c, int af, const void *addr,
/* Established connections not accepting data from tap */
if (conn->events & TAP_FIN_RCVD) {
+ tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq));
+
if (conn->events & SOCK_FIN_RCVD &&
conn->seq_ack_from_tap == conn->seq_to_tap)
conn_event(c, conn, CLOSED);