aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2025-01-21 00:39:06 +0100
committerStefano Brivio <sbrivio@redhat.com>2025-01-21 14:28:44 +0100
commitec5c4d936dafcbc5e07caeb594dfd771050da221 (patch)
treef9839e90cd6075e98521808188c5c0d03aac80c4
parentdb2c91ae86c7c0d1d068714db2342b9057506148 (diff)
downloadpasst-ec5c4d936dafcbc5e07caeb594dfd771050da221.tar
passt-ec5c4d936dafcbc5e07caeb594dfd771050da221.tar.gz
passt-ec5c4d936dafcbc5e07caeb594dfd771050da221.tar.bz2
passt-ec5c4d936dafcbc5e07caeb594dfd771050da221.tar.lz
passt-ec5c4d936dafcbc5e07caeb594dfd771050da221.tar.xz
passt-ec5c4d936dafcbc5e07caeb594dfd771050da221.tar.zst
passt-ec5c4d936dafcbc5e07caeb594dfd771050da221.zip
tcp: Set PSH flag for last incoming packets in a batch
So far we omitted setting PSH flags for inbound traffic altogether: as we ignore the nature of the data we're sending, we can't conclude that some data is more or less urgent. This works fine with Linux guests, as the Linux kernel doesn't do much with it, on input: it will generally deliver data to the application layer without delay. However, with Windows, things change: if we don't set the PSH flag on interactive inbound traffic, we can expect long delays before the data is delivered to the application. This is very visible with RDP, where packets we send on behalf of the RDP client are delivered with delays exceeding one second: $ tshark -r rdp.pcap -td -Y 'frame.number in { 33170 .. 33173 }' --disable-protocol tls 33170 0.030296 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285229 Win=387968 Len=0 33171 0.985412 88.198.0.164 → 93.235.154.248 105 TCP 3389 → 49012 [PSH, ACK] Seq=285229 Ack=13820 Win=63198 Len=51 33172 0.030373 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285280 Win=387968 Len=0 33173 1.383776 88.198.0.164 → 93.235.154.248 424 TCP 3389 → 49012 [PSH, ACK] Seq=285280 Ack=13820 Win=63198 Len=370 in this example (packet capture taken by passt), frame #33172 is a mouse event sent by the RDP client, and frame #33173 is the first event (display reacting to click) sent back by the server. This appears as a 1.4 s delay before we get frame #33173. If we set PSH, instead: $ tshark -r rdp_psh.pcap -td -Y 'frame.number in { 314 .. 317 }' --disable-protocol tls 314 0.002503 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7779 Ack=74047 Win=31872 Len=116 315 0.000557 88.198.0.164 → 93.235.154.248 54 TCP 3389 → 51066 [ACK] Seq=79162 Ack=7895 Win=62872 Len=0 316 0.012752 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7895 Ack=79162 Win=31872 Len=116 317 0.011927 88.198.0.164 → 93.235.154.248 107 TCP 3389 → 51066 [PSH, ACK] Seq=79162 Ack=8011 Win=62756 Len=53 here, in frame #316, our mouse event is delivered without a delay and receives a response in approximately 12 ms. Set PSH on the last segment for any batch we dequeue from the socket, that is, set it whenever we know that we might not be sending data to the same port for a while. Reported-by: NN708 Link: https://bugs.passt.top/show_bug.cgi?id=107 Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--tcp_buf.c11
-rw-r--r--tcp_vu.c7
2 files changed, 13 insertions, 5 deletions
diff --git a/tcp_buf.c b/tcp_buf.c
index cbefa42..72d99c5 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -239,9 +239,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
* @dlen: TCP payload length
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
* @seq: Sequence number to be sent
+ * @push: Set PSH flag, last segment in a batch
*/
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
- ssize_t dlen, int no_csum, uint32_t seq)
+ ssize_t dlen, int no_csum, uint32_t seq, bool push)
{
struct tcp_payload_t *payload;
const uint16_t *check = NULL;
@@ -268,6 +269,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
payload->th.th_x2 = 0;
payload->th.th_flags = 0;
payload->th.ack = 1;
+ payload->th.psh = push;
iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
tcp_l2_buf_fill_headers(conn, iov, check, seq, false);
if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
@@ -402,11 +404,14 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
seq = conn->seq_to_tap;
for (i = 0; i < send_bufs; i++) {
int no_csum = i && i != send_bufs - 1 && tcp_payload_used;
+ bool push = false;
- if (i == send_bufs - 1)
+ if (i == send_bufs - 1) {
dlen = last_len;
+ push = true;
+ }
- tcp_data_to_tap(c, conn, dlen, no_csum, seq);
+ tcp_data_to_tap(c, conn, dlen, no_csum, seq, push);
seq += dlen;
}
diff --git a/tcp_vu.c b/tcp_vu.c
index a216bb1..fad7065 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -289,10 +289,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
* @iov_cnt: Number of entries in @iov
* @check: Checksum, if already known
* @no_tcp_csum: Do not set TCP checksum
+ * @push: Set PSH flag, last segment in a batch
*/
static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
struct iovec *iov, size_t iov_cnt,
- const uint16_t **check, bool no_tcp_csum)
+ const uint16_t **check, bool no_tcp_csum, bool push)
{
const struct flowside *toside = TAPFLOW(conn);
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
@@ -334,6 +335,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
memset(th, 0, sizeof(*th));
th->doff = sizeof(*th) / 4;
th->ack = 1;
+ th->psh = push;
tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
*check, conn->seq_to_tap, no_tcp_csum);
@@ -443,6 +445,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
struct iovec *iov = &elem[head[i]].in_sg[0];
int buf_cnt = head[i + 1] - head[i];
ssize_t dlen = iov_size(iov, buf_cnt) - hdrlen;
+ bool push = i == head_cnt - 1;
vu_set_vnethdr(vdev, iov->iov_base, buf_cnt);
@@ -451,7 +454,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
check = NULL;
previous_dlen = dlen;
- tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap);
+ tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
if (*c->pcap) {
pcap_iov(iov, buf_cnt,