aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2025-12-01 22:54:57 +0100
committerStefano Brivio <sbrivio@redhat.com>2025-12-08 08:03:23 +0100
commit920a479de40b58a81178e5d6e96c0eed30b992d5 (patch)
treed35ef46eebe6feb3587d7989c4cbc179056e1151
parentf423e12e98b4ad30e14f7b1ab259f36d75cb1c04 (diff)
downloadpasst-920a479de40b58a81178e5d6e96c0eed30b992d5.tar
passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.gz
passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.bz2
passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.lz
passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.xz
passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.zst
passt-920a479de40b58a81178e5d6e96c0eed30b992d5.zip
tcp: Limit advertised window to available, not total sending buffer size
For non-local connections, we advertise the same window size as what the peer in turn advertises to us, and limit it to the buffer size reported via SO_SNDBUF. That's not quite correct: in order to later avoid failures while queueing data to the socket, we need to limit the window to the available buffer size, not the total one. Use the SIOCOUTQ ioctl and subtract the number of outbound queued bytes from the total buffer size, then clamp to this value. Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
-rw-r--r--README.md2
-rw-r--r--tcp.c18
2 files changed, 17 insertions, 3 deletions
diff --git a/README.md b/README.md
index 897ae8b..8fdc0a3 100644
--- a/README.md
+++ b/README.md
@@ -291,7 +291,7 @@ speeding up local connections, and usually requiring NAT. _pasta_:
* ✅ all capabilities dropped, other than `CAP_NET_BIND_SERVICE` (if granted)
* ✅ with default options, user, mount, IPC, UTS, PID namespaces are detached
* ✅ no external dependencies (other than a standard C library)
-* ✅ restrictive seccomp profiles (33 syscalls allowed for _passt_, 43 for
+* ✅ restrictive seccomp profiles (34 syscalls allowed for _passt_, 43 for
_pasta_ on x86_64)
* ✅ examples of [AppArmor](/passt/tree/contrib/apparmor) and
[SELinux](/passt/tree/contrib/selinux) profiles available
diff --git a/tcp.c b/tcp.c
index 37aceed..28d3304 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1026,6 +1026,8 @@ void tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
* @tinfo: tcp_info from kernel, can be NULL if not pre-fetched
*
* Return: 1 if sequence or window were updated, 0 otherwise
+ *
+ * #syscalls ioctl
*/
int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
bool force_seq, struct tcp_info_linux *tinfo)
@@ -1108,9 +1110,21 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
if ((conn->flags & LOCAL) || tcp_rtt_dst_low(conn)) {
new_wnd_to_tap = tinfo->tcpi_snd_wnd;
} else {
+ uint32_t sendq;
+ int limit;
+
+ if (ioctl(s, SIOCOUTQ, &sendq)) {
+ debug_perror("SIOCOUTQ on socket %i, assuming 0", s);
+ sendq = 0;
+ }
tcp_get_sndbuf(conn);
- new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd,
- SNDBUF_GET(conn));
+
+ if ((int)sendq > SNDBUF_GET(conn)) /* Due to memory pressure? */
+ limit = 0;
+ else
+ limit = SNDBUF_GET(conn) - (int)sendq;
+
+ new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd, limit);
}
new_wnd_to_tap = MIN(new_wnd_to_tap, MAX_WINDOW);