aboutgitcodebugslistschat
path: root/tcp.c
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-06-08 02:20:28 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-06-08 02:20:28 +0200
commit90078ebc5911f2dc7cb2290e92db67b78a0b9f1e (patch)
treed55e269e7e3e2d1299271b0785cff3391802d78a /tcp.c
parent8b39b0b47f86e5bab88634cba1a8e9932e275f77 (diff)
downloadpasst-90078ebc5911f2dc7cb2290e92db67b78a0b9f1e.tar
passt-90078ebc5911f2dc7cb2290e92db67b78a0b9f1e.tar.gz
passt-90078ebc5911f2dc7cb2290e92db67b78a0b9f1e.tar.bz2
passt-90078ebc5911f2dc7cb2290e92db67b78a0b9f1e.tar.lz
passt-90078ebc5911f2dc7cb2290e92db67b78a0b9f1e.tar.xz
passt-90078ebc5911f2dc7cb2290e92db67b78a0b9f1e.tar.zst
passt-90078ebc5911f2dc7cb2290e92db67b78a0b9f1e.zip
tcp: Add support for kernels not exporting tcpi_snd_wnd via TCP_INFO
Before commit 8f7baad7f035 ("tcp: Add snd_wnd to TCP_INFO"), the kernel didn't export tcpi_snd_wnd via TCP_INFO, which means we don't know what's the window size of the receiver, socket-side. To get TCP connections working in that case, ignore this value if it's zero during handshake, and use the initial window value as suggested by RFC 6928 (14 600 bytes, instead of 4 380 bytes), to keep network performance usable. To make the TCP dynamic responsive enough in this case, also check the socket for available data whenever we get an ACK segment from tap, instead of waiting until all the data from the tap is dequeued. While at it, fix the window scaling value sent for SYN and SYN, ACK segments: we want to increase the data pointer after writing the option, not the value itself. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'tcp.c')
-rw-r--r--tcp.c21
1 files changed, 15 insertions, 6 deletions
diff --git a/tcp.c b/tcp.c
index 4748461..d650166 100644
--- a/tcp.c
+++ b/tcp.c
@@ -327,7 +327,7 @@
#define MAX_WS 10
#define MAX_WINDOW (1 << (16 + (MAX_WS)))
#define MSS_DEFAULT 536
-#define WINDOW_DEFAULT 4380
+#define WINDOW_DEFAULT 14600 /* RFC 6928 */
#define SYN_TIMEOUT 240000 /* ms */
#define ACK_TIMEOUT 3000
@@ -401,6 +401,7 @@ struct tcp_conn;
* @ws_allowed: Window scaling allowed
* @ws: Window scaling factor
* @tap_window: Last window size received from tap, scaled
+ * @no_snd_wnd: Kernel won't report window (without commit 8f7baad7f035)
* @ts_sock: Last activity timestamp from socket for timeout purposes
* @ts_tap: Last activity timestamp from tap for timeout purposes
* @ts_ack_tap: Last ACK segment timestamp from tap for timeout purposes
@@ -434,6 +435,7 @@ struct tcp_conn {
int ws_allowed;
int ws;
int tap_window;
+ int no_snd_wnd;
struct timespec ts_sock;
struct timespec ts_tap;
@@ -748,13 +750,18 @@ static int tcp_send_to_tap(struct ctx *c, int s, int flags, char *in, int len)
data += OPT_MSS_LEN - 2;
th->doff += OPT_MSS_LEN / 4;
- if (tc[s].ws_allowed && (ws = info.tcpi_snd_wscale)) {
+ /* Check if kernel includes commit:
+ * 8f7baad7f035 ("tcp: Add snd_wnd to TCP_INFO")
+ */
+ tc[s].no_snd_wnd = !info.tcpi_snd_wnd;
+
+ if (tc[s].ws_allowed && (ws = info.tcpi_snd_wscale) &&
+ !tc[s].no_snd_wnd) {
*data++ = OPT_NOP;
*data++ = OPT_WS;
*data++ = OPT_WS_LEN;
- *data = ws;
- *data += OPT_WS_LEN - 2;
+ *data++ = ws;
th->doff += (1 + OPT_WS_LEN) / 4;
}
@@ -798,10 +805,10 @@ static int tcp_send_to_tap(struct ctx *c, int s, int flags, char *in, int len)
th->source = tc[s].sock_port;
th->dest = tc[s].tap_port;
- if (!err) {
+ if (!err && !tc[s].no_snd_wnd) {
/* First value sent by receiver is not scaled */
th->window = htons(info.tcpi_snd_wnd >>
- ((flags & SYN) ? 0 : info.tcpi_snd_wscale));
+ (th->syn ? 0 : info.tcpi_snd_wscale));
} else {
th->window = htons(WINDOW_DEFAULT);
}
@@ -1344,6 +1351,8 @@ int tcp_tap_handler(struct ctx *c, int af, void *addr,
th->psh ? 0 : MSG_MORE))
break;
+ tcp_data_from_sock(c, s, now);
+
if (th->fin) {
shutdown(s, SHUT_WR);
if (tc[s].s == ESTABLISHED)