aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-10-04 22:01:16 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-10-04 22:20:43 +0200
commite1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389 (patch)
tree8830b0676c8d27435b8750765ec26eac984c367c
parentf6bff339a95ea19852a6a5d841d141e6155f662e (diff)
downloadpasst-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar
passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.gz
passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.bz2
passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.lz
passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.xz
passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.zst
passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.zip
tcp: Check if connection is local or low RTT was seen before using large MSS
If the connection is local or the RTT was comparable to the time it takes to queue a batch of messages, we can safely use a large MSS regardless of the sending buffer, but otherwise not. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--tcp.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/tcp.c b/tcp.c
index 5d034cb..e001df4 100644
--- a/tcp.c
+++ b/tcp.c
@@ -369,6 +369,9 @@
#define PORT_DETECT_INTERVAL 1000
+#define LOW_RTT_TABLE_SIZE 8
+#define LOW_RTT_THRESHOLD 5 /* us */
+
/* We need to include <linux/tcp.h> for tcpi_bytes_acked, instead of
* <netinet/tcp.h>, but that doesn't include a definition for SOL_TCP
*/
@@ -440,6 +443,7 @@ struct tcp_tap_conn;
* @a.a4.a: IPv4 address
* @tap_port: Guest-facing tap port
* @sock_port: Remote, socket-facing port
+ * @local: Destination is local
* @state: TCP connection state
* @seq_to_tap: Next sequence for packets to tap
* @seq_ack_from_tap: Last ACK number received from tap
@@ -476,6 +480,7 @@ struct tcp_tap_conn {
} a;
in_port_t tap_port;
in_port_t sock_port;
+ int local;
enum tcp_state state;
uint32_t seq_to_tap;
@@ -536,6 +541,9 @@ static int tcp_sock_init_lo [USHRT_MAX][IP_VERSIONS];
static int tcp_sock_init_ext [USHRT_MAX][IP_VERSIONS];
static int tcp_sock_ns [USHRT_MAX][IP_VERSIONS];
+/* Table of destinations with very low RTT (assumed to be local), LRU */
+static struct in6_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
+
/**
* tcp_remap_to_tap() - Set delta for port translation toward guest/tap
* @port: Original destination port, host order
@@ -681,6 +689,47 @@ static int ns_sock_pool4 [TCP_SOCK_POOL_SIZE];
static int ns_sock_pool6 [TCP_SOCK_POOL_SIZE];
/**
+ * tcp_rtt_dst_low() - Check if low RTT was seen for connection endpoint
+ * @conn: Connection pointer
+ * Return: 1 if destination is in low RTT table, 0 otherwise
+ */
+static int tcp_rtt_dst_low(struct tcp_tap_conn *conn)
+{
+ int i;
+
+ for (i = 0; i < LOW_RTT_TABLE_SIZE; i++)
+ if (!memcmp(&conn->a.a6, low_rtt_dst + i, sizeof(conn->a.a6)))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * tcp_rtt_dst_check() - Check tcpi_min_rtt, insert endpoint in table if low
+ * @conn: Connection pointer
+ * @info: Pointer to struct tcp_info for socket
+ */
+static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *info)
+{
+ int i, hole = -1;
+
+ if (!info->tcpi_min_rtt || (int)info->tcpi_min_rtt > LOW_RTT_THRESHOLD)
+ return;
+
+ for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) {
+ if (!memcmp(&conn->a.a6, low_rtt_dst + i, sizeof(conn->a.a6)))
+ return;
+ if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i))
+ hole = i;
+ }
+
+ memcpy(low_rtt_dst + hole++, &conn->a.a6, sizeof(conn->a.a6));
+ if (hole == LOW_RTT_TABLE_SIZE)
+ hole = 0;
+ memcpy(low_rtt_dst + hole, &in6addr_any, sizeof(conn->a.a6));
+}
+
+/**
* tcp_tap_state() - Set given TCP state for tap connection, report to stderr
* @conn: Connection pointer
* @state: New TCP state to be set
@@ -1258,6 +1307,11 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags,
mss -= sizeof(struct iphdr);
else
mss -= sizeof(struct ipv6hdr);
+
+ if (!conn->local && !tcp_rtt_dst_low(conn))
+ mss = MIN(mss, PAGE_SIZE);
+ else
+ mss = ROUND_DOWN(mss, PAGE_SIZE);
}
*(uint16_t *)data = htons(mss);
@@ -1588,6 +1642,11 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
tcp_hash_insert(c, conn, af, addr);
+ if (!bind(s, sa, sl))
+ tcp_rst(c, conn); /* Nobody is listening then */
+ if (errno != EADDRNOTAVAIL)
+ conn->local = 1;
+
if (connect(s, sa, sl)) {
tcp_tap_state(conn, TAP_SYN_SENT);