aboutgitcodebugslistschat
path: root/netlink.c
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2023-12-27 14:46:39 +0100
committerStefano Brivio <sbrivio@redhat.com>2023-12-30 11:45:27 +0100
commitf091893c1ffe1a531989a599737031089f6cfcb4 (patch)
tree2c01695133194a45d6b13570c451f4d2e9d22658 /netlink.c
parent62b94c3ec832f23b985db7d27b30052d2a83cf9a (diff)
downloadpasst-f091893c1ffe1a531989a599737031089f6cfcb4.tar
passt-f091893c1ffe1a531989a599737031089f6cfcb4.tar.gz
passt-f091893c1ffe1a531989a599737031089f6cfcb4.tar.bz2
passt-f091893c1ffe1a531989a599737031089f6cfcb4.tar.lz
passt-f091893c1ffe1a531989a599737031089f6cfcb4.tar.xz
passt-f091893c1ffe1a531989a599737031089f6cfcb4.tar.zst
passt-f091893c1ffe1a531989a599737031089f6cfcb4.zip
netlink: Fetch most specific (longest prefix) address in nl_addr_get()2023_12_30.f091893
This happened in most cases implicitly before commit eff3bcb24547 ("netlink: Split nl_addr() into separate operation functions"): while going through results from netlink, we would only copy an address into the provided return buffer if no address had been picked yet. Because of the insertion logic in the kernel (ipv6_link_dev_addr()), the first returned address would also be the one added last, and, in case of a Linux guest using a DHCPv6 client as well as SLAAC, that would be the address assigned via DHCPv6, because SLAAC happens before the DHCPv6 exchange. The effect of, instead, picking the last returned address (first assigned) is visible when passt or pasta runs nested, given that, by default, they advertise a prefix for SLAAC usage, plus an address via DHCPv6. The first level (L1 guest) would get a /64 address by means of SLAAC, and a /128 address via DHCPv6, the latter matching the address on the host. The second level (L2 guest) would also get two addresses: a /64 via SLAAC (same prefix as the host), and a /128 via DHCPv6, matching the the L1 SLAAC-assigned address, not the one obtained via DHCPv6. That is, none of the L2 addresses would match the address on the host. The whole point of having a DHCPv6 server is to avoid (implicit) NAT when possible, though. Fix this in a more explicit way than the behaviour we initially had: pick the first address among the set of most specific ones, by comparing prefix lengths. Do this for IPv4 and for link-local addresses, too, to match in any case the implementation of the default source address selection. Reported-by: Yalan Zhang <yalzhang@redhat.com> Fixes: eff3bcb24547 ("netlink: Split nl_addr() into separate operation functions") Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'netlink.c')
-rw-r--r--netlink.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/netlink.c b/netlink.c
index 379d46e..bf79dd4 100644
--- a/netlink.c
+++ b/netlink.c
@@ -527,7 +527,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
}
/**
- * nl_addr_get() - Get IP address for given interface and address family
+ * nl_addr_get() - Get most specific global address, given interface and family
* @s: Netlink socket
* @ifi: Interface index in outer network namespace
* @af: Address family
@@ -540,6 +540,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
void *addr, int *prefix_len, void *addr_l)
{
+ uint8_t prefix_max = 0, prefix_max_ll = 0;
struct req_t {
struct nlmsghdr nlh;
struct ifaddrmsg ifa;
@@ -566,17 +567,25 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
if (rta->rta_type != IFA_ADDRESS)
continue;
- if (af == AF_INET) {
+ if (af == AF_INET && ifa->ifa_prefixlen > prefix_max) {
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
- *prefix_len = ifa->ifa_prefixlen;
+
+ prefix_max = *prefix_len = ifa->ifa_prefixlen;
} else if (af == AF_INET6 && addr &&
- ifa->ifa_scope == RT_SCOPE_UNIVERSE) {
+ ifa->ifa_scope == RT_SCOPE_UNIVERSE &&
+ ifa->ifa_prefixlen > prefix_max) {
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+
+ prefix_max = ifa->ifa_prefixlen;
}
if (addr_l &&
- af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK)
+ af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK &&
+ ifa->ifa_prefixlen > prefix_max_ll) {
memcpy(addr_l, RTA_DATA(rta), RTA_PAYLOAD(rta));
+
+ prefix_max_ll = ifa->ifa_prefixlen;
+ }
}
}
return status;