diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2021-10-04 22:01:16 +0200 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2021-10-04 22:20:43 +0200 |
commit | e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389 (patch) | |
tree | 8830b0676c8d27435b8750765ec26eac984c367c | |
parent | f6bff339a95ea19852a6a5d841d141e6155f662e (diff) | |
download | passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.gz passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.bz2 passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.lz passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.xz passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.tar.zst passt-e1a2e2780c91eb83f4cf3dfc8bb8b8ad286f3389.zip |
tcp: Check if connection is local or low RTT was seen before using large MSS
If the connection is local or the RTT was comparable to the time it
takes to queue a batch of messages, we can safely use a large MSS
regardless of the sending buffer, but otherwise not.
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r-- | tcp.c | 59 |
1 files changed, 59 insertions, 0 deletions
@@ -369,6 +369,9 @@ #define PORT_DETECT_INTERVAL 1000 +#define LOW_RTT_TABLE_SIZE 8 +#define LOW_RTT_THRESHOLD 5 /* us */ + /* We need to include <linux/tcp.h> for tcpi_bytes_acked, instead of * <netinet/tcp.h>, but that doesn't include a definition for SOL_TCP */ @@ -440,6 +443,7 @@ struct tcp_tap_conn; * @a.a4.a: IPv4 address * @tap_port: Guest-facing tap port * @sock_port: Remote, socket-facing port + * @local: Destination is local * @state: TCP connection state * @seq_to_tap: Next sequence for packets to tap * @seq_ack_from_tap: Last ACK number received from tap @@ -476,6 +480,7 @@ struct tcp_tap_conn { } a; in_port_t tap_port; in_port_t sock_port; + int local; enum tcp_state state; uint32_t seq_to_tap; @@ -536,6 +541,9 @@ static int tcp_sock_init_lo [USHRT_MAX][IP_VERSIONS]; static int tcp_sock_init_ext [USHRT_MAX][IP_VERSIONS]; static int tcp_sock_ns [USHRT_MAX][IP_VERSIONS]; +/* Table of destinations with very low RTT (assumed to be local), LRU */ +static struct in6_addr low_rtt_dst[LOW_RTT_TABLE_SIZE]; + /** * tcp_remap_to_tap() - Set delta for port translation toward guest/tap * @port: Original destination port, host order @@ -681,6 +689,47 @@ static int ns_sock_pool4 [TCP_SOCK_POOL_SIZE]; static int ns_sock_pool6 [TCP_SOCK_POOL_SIZE]; /** + * tcp_rtt_dst_low() - Check if low RTT was seen for connection endpoint + * @conn: Connection pointer + * Return: 1 if destination is in low RTT table, 0 otherwise + */ +static int tcp_rtt_dst_low(struct tcp_tap_conn *conn) +{ + int i; + + for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) + if (!memcmp(&conn->a.a6, low_rtt_dst + i, sizeof(conn->a.a6))) + return 1; + + return 0; +} + +/** + * tcp_rtt_dst_check() - Check tcpi_min_rtt, insert endpoint in table if low + * @conn: Connection pointer + * @info: Pointer to struct tcp_info for socket + */ +static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *info) +{ + int i, hole = -1; + + if (!info->tcpi_min_rtt || (int)info->tcpi_min_rtt > LOW_RTT_THRESHOLD) + return; + + for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) { + if (!memcmp(&conn->a.a6, low_rtt_dst + i, sizeof(conn->a.a6))) + return; + if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i)) + hole = i; + } + + memcpy(low_rtt_dst + hole++, &conn->a.a6, sizeof(conn->a.a6)); + if (hole == LOW_RTT_TABLE_SIZE) + hole = 0; + memcpy(low_rtt_dst + hole, &in6addr_any, sizeof(conn->a.a6)); +} + +/** * tcp_tap_state() - Set given TCP state for tap connection, report to stderr * @conn: Connection pointer * @state: New TCP state to be set @@ -1258,6 +1307,11 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags, mss -= sizeof(struct iphdr); else mss -= sizeof(struct ipv6hdr); + + if (!conn->local && !tcp_rtt_dst_low(conn)) + mss = MIN(mss, PAGE_SIZE); + else + mss = ROUND_DOWN(mss, PAGE_SIZE); } *(uint16_t *)data = htons(mss); @@ -1588,6 +1642,11 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr, tcp_hash_insert(c, conn, af, addr); + if (!bind(s, sa, sl)) + tcp_rst(c, conn); /* Nobody is listening then */ + if (errno != EADDRNOTAVAIL) + conn->local = 1; + if (connect(s, sa, sl)) { tcp_tap_state(conn, TAP_SYN_SENT); |