diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2024-06-18 12:32:17 +0200 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2024-06-19 15:00:55 +0200 |
commit | 54a9d3801b9549e68bd169e2c938c265ef46e973 (patch) | |
tree | aeaf74713d3e9ce7bb6f0e9e2e404d608c2bae8a | |
parent | 020ff7a40eb5daa5bbaa0afd6b9319cbfb143b01 (diff) | |
download | passt-54a9d3801b9549e68bd169e2c938c265ef46e973.tar passt-54a9d3801b9549e68bd169e2c938c265ef46e973.tar.gz passt-54a9d3801b9549e68bd169e2c938c265ef46e973.tar.bz2 passt-54a9d3801b9549e68bd169e2c938c265ef46e973.tar.lz passt-54a9d3801b9549e68bd169e2c938c265ef46e973.tar.xz passt-54a9d3801b9549e68bd169e2c938c265ef46e973.tar.zst passt-54a9d3801b9549e68bd169e2c938c265ef46e973.zip |
tcp: Don't rely on bind() to fail to decide that connection target is valid
Commit e1a2e2780c91 ("tcp: Check if connection is local or low RTT
was seen before using large MSS") added a call to bind() before we
issue a connect() to the target for an outbound connection.
If bind() fails, but neither with EADDRNOTAVAIL, nor with EACCESS, we
can conclude that the target address is a local (host) address, and we
can use an unlimited MSS.
While at it, according to the reasoning of that commit, if bind()
succeeds, we would know right away that nobody is listening at that
(local) address and port, and we don't even need to call connect(): we
can just fail early and reset the connection attempt.
But if non-local binds are enabled via net.ipv4.ip_nonlocal_bind or
net.ipv6.ip_nonlocal_bind sysctl, binding to a non-local address will
actually succeed, so we can't rely on it to fail in general.
The visible issue with the existing behaviour is that we would reset
any outbound connection to non-local addresses, if non-local binds are
enabled.
Keep the significant optimisation for local addresses along with the
bind() call, but if it succeeds, don't draw any conclusion: close the
socket, grab another one, and proceed normally.
This will incur a small latency penalty if non-local binds are
enabled (we'll likely fetch an existing socket from the pool but
additionally call close()), or if the target is local but not bound:
we'll need to call connect() and get a failure before relaying that
failure back.
Link: https://github.com/containers/podman/issues/23003
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
-rw-r--r-- | tcp.c | 48 |
1 files changed, 31 insertions, 17 deletions
@@ -1631,6 +1631,9 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, flow_initiate(flow, PIF_TAP); + flow_target(flow, PIF_HOST); + conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); + if (af == AF_INET) { if (IN4_IS_ADDR_UNSPECIFIED(saddr) || IN4_IS_ADDR_BROADCAST(saddr) || @@ -1647,6 +1650,9 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, dstport); goto cancel; } + + sa = (struct sockaddr *)&addr4; + sl = sizeof(addr4); } else if (af == AF_INET6) { if (IN6_IS_ADDR_UNSPECIFIED(saddr) || IN6_IS_ADDR_MULTICAST(saddr) || srcport == 0 || @@ -1661,6 +1667,11 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, dstport); goto cancel; } + + sa = (struct sockaddr *)&addr6; + sl = sizeof(addr6); + } else { + ASSERT(0); } if ((s = tcp_conn_sock(c, af)) < 0) @@ -1673,6 +1684,26 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, addr6.sin6_addr = in6addr_loopback; } + /* Use bind() to check if the target address is local (EADDRINUSE or + * similar) and already bound, and set the LOCAL flag in that case. + * + * If bind() succeeds, in general, we could infer that nobody (else) is + * listening on that address and port and reset the connection attempt + * early, but we can't rely on that if non-local binds are enabled, + * because bind() would succeed for any non-local address we can reach. + * + * So, if bind() succeeds, close the socket, get a new one, and proceed. + */ + if (bind(s, sa, sl)) { + if (errno != EADDRNOTAVAIL && errno != EACCES) + conn_flag(c, conn, LOCAL); + } else { + /* Not a local, bound destination, inconclusive test */ + close(s); + if ((s = tcp_conn_sock(c, af)) < 0) + goto cancel; + } + if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) { struct sockaddr_in6 addr6_ll = { .sin6_family = AF_INET6, @@ -1683,8 +1714,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, goto cancel; } - flow_target(flow, PIF_HOST); - conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; conn->timer = -1; conn_event(c, conn, TAP_SYN_RCVD); @@ -1706,14 +1735,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, inany_from_af(&conn->faddr, af, daddr); - if (af == AF_INET) { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } else { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } - conn->fport = dstport; conn->eport = srcport; @@ -1726,13 +1747,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, tcp_hash_insert(c, conn); - if (!bind(s, sa, sl)) { - tcp_rst(c, conn); /* Nobody is listening then */ - goto cancel; - } - if (errno != EADDRNOTAVAIL && errno != EACCES) - conn_flag(c, conn, LOCAL); - if ((af == AF_INET && !IN4_IS_ADDR_LOOPBACK(&addr4.sin_addr)) || (af == AF_INET6 && !IN6_IS_ADDR_LOOPBACK(&addr6.sin6_addr) && !IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr))) |