diff options
| author | Stefano Brivio <sbrivio@redhat.com> | 2025-12-01 22:54:57 +0100 |
|---|---|---|
| committer | Stefano Brivio <sbrivio@redhat.com> | 2025-12-08 08:03:23 +0100 |
| commit | 920a479de40b58a81178e5d6e96c0eed30b992d5 (patch) | |
| tree | d35ef46eebe6feb3587d7989c4cbc179056e1151 | |
| parent | f423e12e98b4ad30e14f7b1ab259f36d75cb1c04 (diff) | |
| download | passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.gz passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.bz2 passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.lz passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.xz passt-920a479de40b58a81178e5d6e96c0eed30b992d5.tar.zst passt-920a479de40b58a81178e5d6e96c0eed30b992d5.zip | |
tcp: Limit advertised window to available, not total sending buffer size
For non-local connections, we advertise the same window size as what
the peer in turn advertises to us, and limit it to the buffer size
reported via SO_SNDBUF.
That's not quite correct: in order to later avoid failures while
queueing data to the socket, we need to limit the window to the
available buffer size, not the total one.
Use the SIOCOUTQ ioctl and subtract the number of outbound queued
bytes from the total buffer size, then clamp to this value.
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | tcp.c | 18 |
2 files changed, 17 insertions, 3 deletions
@@ -291,7 +291,7 @@ speeding up local connections, and usually requiring NAT. _pasta_: * ✅ all capabilities dropped, other than `CAP_NET_BIND_SERVICE` (if granted) * ✅ with default options, user, mount, IPC, UTS, PID namespaces are detached * ✅ no external dependencies (other than a standard C library) -* ✅ restrictive seccomp profiles (33 syscalls allowed for _passt_, 43 for +* ✅ restrictive seccomp profiles (34 syscalls allowed for _passt_, 43 for _pasta_ on x86_64) * ✅ examples of [AppArmor](/passt/tree/contrib/apparmor) and [SELinux](/passt/tree/contrib/selinux) profiles available @@ -1026,6 +1026,8 @@ void tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn, * @tinfo: tcp_info from kernel, can be NULL if not pre-fetched * * Return: 1 if sequence or window were updated, 0 otherwise + * + * #syscalls ioctl */ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, bool force_seq, struct tcp_info_linux *tinfo) @@ -1108,9 +1110,21 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, if ((conn->flags & LOCAL) || tcp_rtt_dst_low(conn)) { new_wnd_to_tap = tinfo->tcpi_snd_wnd; } else { + uint32_t sendq; + int limit; + + if (ioctl(s, SIOCOUTQ, &sendq)) { + debug_perror("SIOCOUTQ on socket %i, assuming 0", s); + sendq = 0; + } tcp_get_sndbuf(conn); - new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd, - SNDBUF_GET(conn)); + + if ((int)sendq > SNDBUF_GET(conn)) /* Due to memory pressure? */ + limit = 0; + else + limit = SNDBUF_GET(conn) - (int)sendq; + + new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd, limit); } new_wnd_to_tap = MIN(new_wnd_to_tap, MAX_WINDOW); |
