aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2025-04-10 17:16:40 +1000
committerStefano Brivio <sbrivio@redhat.com>2025-04-10 19:46:16 +0200
commit59cc89f4cc018988428637d97745cc4c919126cb (patch)
tree12688ff4bf049d5151bd86b12098680b7a6b11d5
parent695c62396eb3f4627c1114ce444394e3ba34373a (diff)
downloadpasst-59cc89f4cc018988428637d97745cc4c919126cb.tar
passt-59cc89f4cc018988428637d97745cc4c919126cb.tar.gz
passt-59cc89f4cc018988428637d97745cc4c919126cb.tar.bz2
passt-59cc89f4cc018988428637d97745cc4c919126cb.tar.lz
passt-59cc89f4cc018988428637d97745cc4c919126cb.tar.xz
passt-59cc89f4cc018988428637d97745cc4c919126cb.tar.zst
passt-59cc89f4cc018988428637d97745cc4c919126cb.zip
udp, udp_flow: Track our specific address on socket interfaces
So far for UDP flows (like TCP connections) we didn't record our address (oaddr) in the flow table entry for socket based pifs. That's because we didn't have that information when a flow was initiated by a datagram coming to a "listening" socket with 0.0.0.0 or :: address. Even when we did have the information, we didn't record it, to simplify address matching on lookups. This meant that in some circumstances we could send replies on a UDP flow from a different address than the originating request came to, which is surprising and breaks certain setups. We now have code in udp_peek_addr() which does determine our address for incoming UDP datagrams. We can use that information to properly populate oaddr in the flow table for flow initiated from a socket. In order to be able to consistently match datagrams to flows, we must *always* have a specific oaddr, not an unspecified address (that's how the flow hash table works). So, we also need to fill in oaddr correctly for flows we initiate *to* sockets. Our forwarding logic doesn't specify oaddr here, letting the kernel decide based on the routing table. In this case we need to call getsockname() after connect()ing the socket to find which local address the kernel picked. This adds getsockname() to our seccomp profile for all variants. Link: https://bugs.passt.top/show_bug.cgi?id=99 Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--flow.c14
-rw-r--r--flow.h3
-rw-r--r--flow_table.h1
-rw-r--r--tcp.c2
-rw-r--r--udp.c4
-rw-r--r--udp_flow.c36
-rw-r--r--udp_flow.h3
-rw-r--r--util.h10
8 files changed, 61 insertions, 12 deletions
diff --git a/flow.c b/flow.c
index 29a83e1..3c81cb4 100644
--- a/flow.c
+++ b/flow.c
@@ -396,18 +396,22 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
* @flow: Flow to change state
* @pif: pif of the initiating side
* @ssa: Source socket address
+ * @daddr: Destination address (may be NULL)
* @dport: Destination port
*
* Return: pointer to the initiating flowside information
*/
struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
const union sockaddr_inany *ssa,
+ const union inany_addr *daddr,
in_port_t dport)
{
struct flowside *ini = &flow->f.side[INISIDE];
inany_from_sockaddr(&ini->eaddr, &ini->eport, ssa);
- if (inany_v4(&ini->eaddr))
+ if (daddr)
+ ini->oaddr = *daddr;
+ else if (inany_v4(&ini->eaddr))
ini->oaddr = inany_any4;
else
ini->oaddr = inany_any6;
@@ -751,19 +755,23 @@ flow_sidx_t flow_lookup_af(const struct ctx *c,
* @proto: Protocol of the flow (IP L4 protocol number)
* @pif: Interface of the flow
* @esa: Socket address of the endpoint
+ * @oaddr: Our address (may be NULL)
* @oport: Our port number
*
* Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
*/
flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
- const void *esa, in_port_t oport)
+ const void *esa,
+ const union inany_addr *oaddr, in_port_t oport)
{
struct flowside side = {
.oport = oport,
};
inany_from_sockaddr(&side.eaddr, &side.eport, esa);
- if (inany_v4(&side.eaddr))
+ if (oaddr)
+ side.oaddr = *oaddr;
+ else if (inany_v4(&side.eaddr))
side.oaddr = inany_any4;
else
side.oaddr = inany_any6;
diff --git a/flow.h b/flow.h
index dcf7645..cac618a 100644
--- a/flow.h
+++ b/flow.h
@@ -243,7 +243,8 @@ flow_sidx_t flow_lookup_af(const struct ctx *c,
const void *eaddr, const void *oaddr,
in_port_t eport, in_port_t oport);
flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
- const void *esa, in_port_t oport);
+ const void *esa,
+ const union inany_addr *oaddr, in_port_t oport);
union flow;
diff --git a/flow_table.h b/flow_table.h
index fd2c57b..2d5c65c 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -199,6 +199,7 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
const void *daddr, in_port_t dport);
struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
const union sockaddr_inany *ssa,
+ const union inany_addr *daddr,
in_port_t dport);
const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
sa_family_t af,
diff --git a/tcp.c b/tcp.c
index 35626c9..9c6bc52 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2201,7 +2201,7 @@ void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
* mode only, below.
*/
ini = flow_initiate_sa(flow, ref.tcp_listen.pif, &sa,
- ref.tcp_listen.port);
+ NULL, ref.tcp_listen.port);
if (c->mode == MODE_VU) { /* Rebind to same address after migration */
if (!getsockname(s, &sa.sa, &sl))
diff --git a/udp.c b/udp.c
index a71141a..40af7df 100644
--- a/udp.c
+++ b/udp.c
@@ -737,8 +737,8 @@ void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
union inany_addr dst;
while (udp_peek_addr(s, &src, &dst) == 0) {
- flow_sidx_t tosidx = udp_flow_from_sock(c, frompif, port,
- &src, now);
+ flow_sidx_t tosidx = udp_flow_from_sock(c, frompif,
+ &dst, port, &src, now);
uint8_t topif = pif_at_sidx(tosidx);
if (pif_is_socket(topif)) {
diff --git a/udp_flow.c b/udp_flow.c
index 75f5a0b..ef2cbb0 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -123,14 +123,17 @@ static int udp_flow_sock(const struct ctx *c,
* @now: Timestamp
*
* Return: UDP specific flow, if successful, NULL on failure
+ *
+ * #syscalls getsockname
*/
static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
const struct timespec *now)
{
struct udp_flow *uflow = NULL;
+ const struct flowside *tgt;
unsigned sidei;
- if (!flow_target(c, flow, IPPROTO_UDP))
+ if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
goto cancel;
uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
@@ -144,6 +147,29 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
goto cancel;
}
+ if (uflow->s[TGTSIDE] >= 0 && inany_is_unspecified(&tgt->oaddr)) {
+ /* When we target a socket, we connect() it, but might not
+ * always bind(), leaving the kernel to pick our address. In
+ * that case connect() will implicitly bind() the socket, but we
+ * need to determine its local address so that we can match
+ * reply packets back to the correct flow. Update the flow with
+ * the information from getsockname() */
+ union sockaddr_inany sa;
+ socklen_t sl = sizeof(sa);
+ in_port_t port;
+
+ if (getsockname(uflow->s[TGTSIDE], &sa.sa, &sl) < 0) {
+ flow_perror(uflow, "Unable to determine local address");
+ goto cancel;
+ }
+ inany_from_sockaddr(&uflow->f.side[TGTSIDE].oaddr,
+ &port, &sa);
+ if (port != tgt->oport) {
+ flow_err(uflow, "Unexpected local port");
+ goto cancel;
+ }
+ }
+
/* Tap sides always need to be looked up by hash. Socket sides don't
* always, but sometimes do (receiving packets on a socket not specific
* to one flow). Unconditionally hash both sides so all our bases are
@@ -167,6 +193,7 @@ cancel:
* udp_flow_from_sock() - Find or create UDP flow for incoming datagram
* @c: Execution context
* @pif: Interface the datagram is arriving from
+ * @dst: Our (local) address to which the datagram is arriving
* @port: Our (local) port number to which the datagram is arriving
* @s_in: Source socket address, filled in by recvmmsg()
* @now: Timestamp
@@ -176,7 +203,8 @@ cancel:
* Return: sidx for the destination side of the flow for this packet, or
* FLOW_SIDX_NONE if we couldn't find or create a flow.
*/
-flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, in_port_t port,
+flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
+ const union inany_addr *dst, in_port_t port,
const union sockaddr_inany *s_in,
const struct timespec *now)
{
@@ -185,7 +213,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, in_port_t port,
union flow *flow;
flow_sidx_t sidx;
- sidx = flow_lookup_sa(c, IPPROTO_UDP, pif, s_in, port);
+ sidx = flow_lookup_sa(c, IPPROTO_UDP, pif, s_in, dst, port);
if ((uflow = udp_at_sidx(sidx))) {
uflow->ts = now->tv_sec;
return flow_sidx_opposite(sidx);
@@ -199,7 +227,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, in_port_t port,
return FLOW_SIDX_NONE;
}
- ini = flow_initiate_sa(flow, pif, s_in, port);
+ ini = flow_initiate_sa(flow, pif, s_in, dst, port);
if (!inany_is_unicast(&ini->eaddr) ||
ini->eport == 0 || ini->oport == 0) {
diff --git a/udp_flow.h b/udp_flow.h
index e289122..4c528e9 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -32,7 +32,8 @@ struct udp_flow {
};
struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
-flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, in_port_t port,
+flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
+ const union inany_addr *dst, in_port_t port,
const union sockaddr_inany *s_in,
const struct timespec *now);
flow_sidx_t udp_flow_from_tap(const struct ctx *c,
diff --git a/util.h b/util.h
index b1e7e79..cc7d084 100644
--- a/util.h
+++ b/util.h
@@ -371,6 +371,16 @@ static inline int wrap_accept4(int sockfd, struct sockaddr *addr,
#define accept4(s, addr, addrlen, flags) \
wrap_accept4((s), (addr), (addrlen), (flags))
+static inline int wrap_getsockname(int sockfd, struct sockaddr *addr,
+/* cppcheck-suppress constParameterPointer */
+ socklen_t *addrlen)
+{
+ sa_init(addr, addrlen);
+ return getsockname(sockfd, addr, addrlen);
+}
+#define getsockname(s, addr, addrlen) \
+ wrap_getsockname((s), (addr), (addrlen))
+
#define PASST_MAXDNAME 254 /* 253 (RFC 1035) + 1 (the terminator) */
void encode_domain_name(char *buf, const char *domain_name);