aboutgitcodebugslistschat
path: root/tcp.c
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-08-04 01:35:45 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-08-04 01:35:45 +0200
commit539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6 (patch)
tree67dcb6c24076593c27b39a2df428e3678fe0c46e /tcp.c
parent0017bc3c3e3f666bb190f2c1f808e447e96a10aa (diff)
downloadpasst-539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6.tar
passt-539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6.tar.gz
passt-539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6.tar.bz2
passt-539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6.tar.lz
passt-539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6.tar.xz
passt-539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6.tar.zst
passt-539dcf5add9c8a3cd22ca8af89069be1bbbcf3f6.zip
tcp: Fast re-transmit, more fixes for closing states and no_snd_wnd
...and while at it, fix an issue in the calculation of the last IOV buffer size: if we can't receive enough data to fill up the window, the last buffer can be filled completely. Also streamline the code setting iovec lengths if cached values are not matching. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'tcp.c')
-rw-r--r--tcp.c118
1 files changed, 73 insertions, 45 deletions
diff --git a/tcp.c b/tcp.c
index 5f9d5f6..977e745 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1545,12 +1545,16 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn,
already_sent = conn->seq_to_tap - conn->seq_ack_from_tap;
if (!conn->tap_window || already_sent >= conn->tap_window)
- return 1;
+ return 0;
fill_bufs = DIV_ROUND_UP(conn->tap_window - already_sent,
conn->mss_guest);
- if (fill_bufs > TCP_TAP_FRAMES)
+ if (fill_bufs > TCP_TAP_FRAMES) {
fill_bufs = TCP_TAP_FRAMES;
+ iov_rem = 0;
+ } else {
+ iov_rem = (conn->tap_window - already_sent) % conn->mss_guest;
+ }
/* Adjust iovec length for recvmsg() based on what was set last time. */
if (v4) {
@@ -1562,21 +1566,16 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn,
buf_mss = &tcp6_l2_buf_mss;
buf_mss_nr_set = &tcp6_l2_buf_mss_nr_set;
}
- if (*buf_mss != conn->mss_guest) {
- for (i = 0; i < fill_bufs; i++)
- iov[i].iov_len = conn->mss_guest;
- *buf_mss = conn->mss_guest;
- *buf_mss_nr_set = fill_bufs - 1;
- } else if (*buf_mss_nr_set < fill_bufs) {
- for (i = *buf_mss_nr_set; i < fill_bufs; i++)
- iov[i].iov_len = conn->mss_guest;
- *buf_mss_nr_set = fill_bufs - 1;
- }
+ if (*buf_mss != conn->mss_guest)
+ *buf_mss_nr_set = 0;
+ for (i = *buf_mss_nr_set; i < fill_bufs; i++)
+ iov[i].iov_len = conn->mss_guest;
+ *buf_mss = conn->mss_guest;
+ *buf_mss_nr_set = fill_bufs - 1;
/* First buffer is to discard data, last one may be partially filled. */
iov[-1].iov_len = already_sent;
- iov_rem = (conn->tap_window - already_sent) % conn->mss_guest;
- if (iov_rem && fill_bufs < TCP_TAP_FRAMES)
+ if (iov_rem)
iov[fill_bufs - 1].iov_len = iov_rem;
if (v4)
tcp4_l2_mh_sock.msg_iovlen = fill_bufs + 1;
@@ -1616,27 +1615,28 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn,
buf_mss_tap = &tcp6_l2_buf_mss_tap;
buf_mss_tap_nr_set = &tcp6_l2_buf_mss_tap_nr_set;
}
- if (*buf_mss_tap != mss_tap) {
- for (i = 0; i < send_bufs; i++)
- iov_tap[i].iov_len = mss_tap;
- *buf_mss_tap = mss_tap;
- *buf_mss_tap_nr_set = send_bufs;
- } else if (*buf_mss_tap_nr_set < send_bufs) {
- for (i = *buf_mss_tap_nr_set; i < send_bufs; i++)
- iov_tap[i].iov_len = mss_tap;
- *buf_mss_tap_nr_set = send_bufs;
- }
+ if (*buf_mss_tap != mss_tap)
+ *buf_mss_tap_nr_set = 0;
+ for (i = *buf_mss_tap_nr_set; i < send_bufs; i++)
+ iov_tap[i].iov_len = mss_tap;
+ *buf_mss_tap = mss_tap;
+ *buf_mss_tap_nr_set = send_bufs;
iov_tap[send_bufs - 1].iov_len = mss_tap - conn->mss_guest + last_len;
/* Likely, some new data was acked too. */
if (conn->seq_from_tap != conn->seq_ack_to_tap) {
- if (getsockopt(conn->sock, SOL_TCP, TCP_INFO, &info, &sl))
- goto err;
+ if (conn->no_snd_wnd) {
+ conn->seq_ack_to_tap = conn->seq_from_tap;
+ } else {
+ if (getsockopt(conn->sock, SOL_TCP, TCP_INFO, &info,
+ &sl))
+ goto err;
- conn->tcpi_acked_last = info.tcpi_bytes_acked;
- conn->seq_ack_to_tap = info.tcpi_bytes_acked +
- conn->seq_init_from_tap;
+ conn->tcpi_acked_last = info.tcpi_bytes_acked;
+ conn->seq_ack_to_tap = info.tcpi_bytes_acked +
+ conn->seq_init_from_tap;
+ }
} else {
info.tcpi_snd_wscale = conn->ws;
info.tcpi_snd_wnd = conn->tcpi_snd_wnd;
@@ -1776,11 +1776,14 @@ err:
goto out_restore_iov;
zero_len:
- if (conn->state >= ESTABLISHED_SOCK_FIN)
- goto out_restore_iov;
+ if (conn->state == FIN_WAIT_1) {
+ tcp_tap_state(conn, FIN_WAIT_1_SOCK_FIN);
+ } else if (conn->state < ESTABLISHED_SOCK_FIN) {
+ tcp_tap_state(conn, ESTABLISHED_SOCK_FIN);
+ shutdown(conn->sock, SHUT_RD);
+ tcp_send_to_tap(c, conn, FIN | ACK, NULL, 0);
+ }
- tcp_tap_state(conn, ESTABLISHED_SOCK_FIN);
- tcp_send_to_tap(c, conn, FIN | ACK, NULL, 0);
goto out_restore_iov;
out:
@@ -1807,10 +1810,11 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
struct tap_msg *msg, int count,
struct timespec *now)
{
+ int i, iov_i, keep = -1, ack = 0, fin = 0, retr = 0;
struct msghdr mh = { .msg_iov = tcp_tap_iov };
uint32_t max_ack_seq = conn->seq_ack_from_tap;
uint32_t seq_from_tap = conn->seq_from_tap;
- int i, iov_i, keep = -1, ack = 0, fin = 0;
+ uint16_t max_ack_seq_wnd;
ssize_t len;
for (i = 0, iov_i = 0; i < count; i++) {
@@ -1840,17 +1844,33 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
seq = ntohl(th->seq);
ack_seq = ntohl(th->ack_seq);
+ if (!i) {
+ if (count == 1)
+ max_ack_seq_wnd = ntohs(th->window);
+ else
+ max_ack_seq_wnd = ntohs(th->window) - 1;
+ }
if (th->ack) {
ack = 1;
if (ack_seq - conn->seq_ack_from_tap < MAX_WINDOW &&
- ack_seq - max_ack_seq < MAX_WINDOW)
+ ack_seq - max_ack_seq < MAX_WINDOW) {
+
+ /* Fast re-transmit */
+ retr = !len && ack_seq == max_ack_seq &&
+ max_ack_seq_wnd == ntohs(th->window);
+
+ max_ack_seq_wnd = ntohs(th->window);
max_ack_seq = ack_seq;
+ }
}
if (th->fin)
fin = 1;
+ if (!len)
+ continue;
+
seq_offset = seq_from_tap - seq;
/* Use data from this buffer only in these two cases:
*
@@ -1858,7 +1878,6 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
* |--------| <-- len |--------| <-- len
* '----' <-- offset ' <-- offset
* ^ seq ^ seq
- *
* (offset >= 0, seq + len > seq_from_tap)
*
* discard in these two cases:
@@ -1900,6 +1919,11 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
tcp_sock_consume(conn, max_ack_seq);
}
+ if (retr) {
+ conn->seq_to_tap = max_ack_seq;
+ tcp_data_from_sock(c, conn, now);
+ }
+
if (!iov_i) {
if (keep != -1) {
tcp_send_to_tap(c, conn, ACK, NULL, 0);
@@ -1926,7 +1950,8 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
}
conn->seq_from_tap += len;
- tcp_send_to_tap(c, conn, 0, NULL, 0);
+ if (!fin)
+ tcp_send_to_tap(c, conn, 0, NULL, 0);
fin:
if (conn->state == ESTABLISHED_SOCK_FIN && ack &&
@@ -1935,11 +1960,12 @@ fin:
if (fin) {
shutdown(conn->sock, SHUT_WR);
- if (conn->state == ESTABLISHED)
+ if (conn->state == ESTABLISHED) {
tcp_tap_state(conn, FIN_WAIT_1);
- else
+ tcp_data_from_sock(c, conn, now);
+ } else {
tcp_tap_state(conn, LAST_ACK);
- return;
+ }
}
}
@@ -2023,12 +2049,12 @@ int tcp_tap_handler(struct ctx *c, int af, void *addr,
conn->seq_ack_to_tap = conn->seq_from_tap;
tcp_tap_state(conn, ESTABLISHED);
- tcp_send_to_tap(c, conn, ACK, NULL, 0);
/* The client might have sent data already, which we didn't
* dequeue waiting for SYN,ACK from tap -- check now.
*/
tcp_data_from_sock(c, conn, now);
+ tcp_send_to_tap(c, conn, 0, NULL, 0);
ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
ref.s = conn->sock;
@@ -2055,11 +2081,13 @@ int tcp_tap_handler(struct ctx *c, int af, void *addr,
case CLOSE_WAIT:
tcp_data_from_tap(c, conn, msg, count, now);
return count;
+ case FIN_WAIT_1:
+ tcp_send_to_tap(c, conn, ACK, NULL, 0);
+ break;
case FIN_WAIT_1_SOCK_FIN:
if (th->ack)
tcp_tap_destroy(c, conn);
break;
- case FIN_WAIT_1:
case TAP_SYN_SENT:
case LAST_ACK:
case SPLICE_ACCEPTED:
@@ -2570,9 +2598,9 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
tcp_tap_state(conn, FIN_WAIT_1_SOCK_FIN);
shutdown(conn->sock, SHUT_RD);
tcp_data_from_sock(c, conn, now);
- tcp_send_to_tap(c, conn, FIN | ACK, NULL, 0);
tcp_sock_consume(conn, conn->seq_ack_from_tap);
- } else {
+ tcp_send_to_tap(c, conn, FIN | ACK, NULL, 0);
+ } else if (conn->state != ESTABLISHED_SOCK_FIN) {
tcp_tap_destroy(c, conn);
}
}
@@ -2731,7 +2759,7 @@ static void tcp_timer_one(struct ctx *c, struct tcp_tap_conn *conn,
if (sock_ms > ACK_INTERVAL) {
if (conn->seq_from_tap > conn->seq_ack_to_tap)
- tcp_send_to_tap(c, conn, ACK, NULL, 0);
+ tcp_send_to_tap(c, conn, 0, NULL, 0);
}
if (ack_tap_ms > ACK_TIMEOUT) {