From f091893c1ffe1a531989a599737031089f6cfcb4 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 27 Dec 2023 14:46:39 +0100 Subject: netlink: Fetch most specific (longest prefix) address in nl_addr_get() This happened in most cases implicitly before commit eff3bcb24547 ("netlink: Split nl_addr() into separate operation functions"): while going through results from netlink, we would only copy an address into the provided return buffer if no address had been picked yet. Because of the insertion logic in the kernel (ipv6_link_dev_addr()), the first returned address would also be the one added last, and, in case of a Linux guest using a DHCPv6 client as well as SLAAC, that would be the address assigned via DHCPv6, because SLAAC happens before the DHCPv6 exchange. The effect of, instead, picking the last returned address (first assigned) is visible when passt or pasta runs nested, given that, by default, they advertise a prefix for SLAAC usage, plus an address via DHCPv6. The first level (L1 guest) would get a /64 address by means of SLAAC, and a /128 address via DHCPv6, the latter matching the address on the host. The second level (L2 guest) would also get two addresses: a /64 via SLAAC (same prefix as the host), and a /128 via DHCPv6, matching the the L1 SLAAC-assigned address, not the one obtained via DHCPv6. That is, none of the L2 addresses would match the address on the host. The whole point of having a DHCPv6 server is to avoid (implicit) NAT when possible, though. Fix this in a more explicit way than the behaviour we initially had: pick the first address among the set of most specific ones, by comparing prefix lengths. Do this for IPv4 and for link-local addresses, too, to match in any case the implementation of the default source address selection. Reported-by: Yalan Zhang Fixes: eff3bcb24547 ("netlink: Split nl_addr() into separate operation functions") Signed-off-by: Stefano Brivio --- netlink.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'netlink.c') diff --git a/netlink.c b/netlink.c index 379d46e..bf79dd4 100644 --- a/netlink.c +++ b/netlink.c @@ -527,7 +527,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src, } /** - * nl_addr_get() - Get IP address for given interface and address family + * nl_addr_get() - Get most specific global address, given interface and family * @s: Netlink socket * @ifi: Interface index in outer network namespace * @af: Address family @@ -540,6 +540,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src, int nl_addr_get(int s, unsigned int ifi, sa_family_t af, void *addr, int *prefix_len, void *addr_l) { + uint8_t prefix_max = 0, prefix_max_ll = 0; struct req_t { struct nlmsghdr nlh; struct ifaddrmsg ifa; @@ -566,17 +567,25 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af, if (rta->rta_type != IFA_ADDRESS) continue; - if (af == AF_INET) { + if (af == AF_INET && ifa->ifa_prefixlen > prefix_max) { memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta)); - *prefix_len = ifa->ifa_prefixlen; + + prefix_max = *prefix_len = ifa->ifa_prefixlen; } else if (af == AF_INET6 && addr && - ifa->ifa_scope == RT_SCOPE_UNIVERSE) { + ifa->ifa_scope == RT_SCOPE_UNIVERSE && + ifa->ifa_prefixlen > prefix_max) { memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta)); + + prefix_max = ifa->ifa_prefixlen; } if (addr_l && - af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK) + af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK && + ifa->ifa_prefixlen > prefix_max_ll) { memcpy(addr_l, RTA_DATA(rta), RTA_PAYLOAD(rta)); + + prefix_max_ll = ifa->ifa_prefixlen; + } } } return status; -- cgit v1.2.3