diff options
Diffstat (limited to 'netlink.c')
-rw-r--r-- | netlink.c | 324 |
1 files changed, 278 insertions, 46 deletions
@@ -33,8 +33,13 @@ #include "util.h" #include "passt.h" #include "log.h" +#include "ip.h" #include "netlink.h" +/* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */ +#define RTNH_NEXT_AND_DEC(rtnh, attrlen) \ + ((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh)) + /* Netlink expects a buffer of at least 8kiB or the system page size, * whichever is larger. 32kiB is recommended for more efficient. * Since the largest page size on any remotely common Linux setup is @@ -128,7 +133,7 @@ static uint32_t nl_send(int s, void *req, uint16_t type, n = send(s, req, len, 0); if (n < 0) - die("netlink: Failed to send(): %s", strerror(errno)); + die_perror("netlink: Failed to send()"); else if (n < len) die("netlink: Short send (%zd of %zd bytes)", n, len); @@ -184,7 +189,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t * *n = recv(s, buf, NLBUFSIZ, 0); if (*n < 0) - die("netlink: Failed to recv(): %s", strerror(errno)); + die_perror("netlink: Failed to recv()"); nh = (struct nlmsghdr *)buf; if (!NLMSG_OK(nh, *n)) @@ -254,7 +259,8 @@ unsigned int nl_get_ext_if(int s, sa_family_t af) .rtm.rtm_type = RTN_UNICAST, .rtm.rtm_family = af, }; - unsigned int ifi = 0; + unsigned defifi = 0, anyifi = 0; + unsigned ndef = 0, nany = 0; struct nlmsghdr *nh; struct rtattr *rta; char buf[NLBUFSIZ]; @@ -262,30 +268,80 @@ unsigned int nl_get_ext_if(int s, sa_family_t af) uint32_t seq; size_t na; + /* Look for an interface with a default route first, failing that, look + * for any interface with a route, and pick the first one, if any. + */ seq = nl_send(s, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req)); nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWROUTE) { struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh); + const void *dst = NULL; + unsigned thisifi = 0; - if (ifi || rtm->rtm_dst_len || rtm->rtm_family != af) + if (rtm->rtm_family != af) continue; for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { if (rta->rta_type == RTA_OIF) { - ifi = *(unsigned int *)RTA_DATA(rta); + thisifi = *(unsigned int *)RTA_DATA(rta); } else if (rta->rta_type == RTA_MULTIPATH) { const struct rtnexthop *rtnh; rtnh = (struct rtnexthop *)RTA_DATA(rta); - ifi = rtnh->rtnh_ifindex; + thisifi = rtnh->rtnh_ifindex; + } else if (rta->rta_type == RTA_DST) { + dst = RTA_DATA(rta); } } + + if (!thisifi) + continue; /* No interface for this route */ + + /* Skip routes to link-local addresses */ + if (af == AF_INET && dst && + IN4_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len)) + continue; + + if (af == AF_INET6 && dst && + IN6_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len)) + continue; + + if (rtm->rtm_dst_len == 0) { + /* Default route */ + ndef++; + if (!defifi) + defifi = thisifi; + } else { + /* Non-default route */ + nany++; + if (!anyifi) + anyifi = thisifi; + } } if (status < 0) warn("netlink: RTM_GETROUTE failed: %s", strerror(-status)); - return ifi; + if (defifi) { + if (ndef > 1) { + info("Multiple default %s routes, picked first", + af_name(af)); + } + return defifi; + } + + if (anyifi) { + if (nany > 1) { + info("Multiple interfaces with %s routes, picked first", + af_name(af)); + } + return anyifi; + } + + if (!nany) + info("No interfaces with usable %s routes", af_name(af)); + + return 0; } /** @@ -297,12 +353,13 @@ unsigned int nl_get_ext_if(int s, sa_family_t af) */ bool nl_route_get_def_multipath(struct rtattr *rta, void *gw) { + int nh_len = RTA_PAYLOAD(rta); struct rtnexthop *rtnh; bool found = false; int hops = -1; for (rtnh = (struct rtnexthop *)RTA_DATA(rta); - RTNH_OK(rtnh, RTA_PAYLOAD(rta)); rtnh = RTNH_NEXT(rtnh)) { + RTNH_OK(rtnh, nh_len); rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) { size_t len = rtnh->rtnh_len - sizeof(*rtnh); struct rtattr *rta_inner; @@ -332,7 +389,7 @@ bool nl_route_get_def_multipath(struct rtattr *rta, void *gw) * @af: Address family * @gw: Default gateway to fill on NL_GET * - * Return: 0 on success, negative error code on failure + * Return: error on netlink failure, or 0 (gw unset if default route not found) */ int nl_route_get_def(int s, unsigned int ifi, sa_family_t af, void *gw) { @@ -479,7 +536,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src, .rta.rta_len = RTA_LENGTH(sizeof(unsigned int)), .ifi = ifi_src, }; - ssize_t nlmsgs_size, status; + ssize_t nlmsgs_size, left, status; unsigned dup_routes = 0; struct nlmsghdr *nh; char buf[NLBUFSIZ]; @@ -493,39 +550,83 @@ int nl_route_dup(int s_src, unsigned int ifi_src, * routes in the buffer at once. */ nh = nl_next(s_src, buf, NULL, &nlmsgs_size); - for (status = nlmsgs_size; - NLMSG_OK(nh, status) && (status = nl_status(nh, status, seq)) > 0; - nh = NLMSG_NEXT(nh, status)) { + for (left = nlmsgs_size; + NLMSG_OK(nh, left) && (status = nl_status(nh, left, seq)) > 0; + nh = NLMSG_NEXT(nh, left)) { struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh); + bool discard = false; struct rtattr *rta; size_t na; if (nh->nlmsg_type != RTM_NEWROUTE) continue; - dup_routes++; - for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + /* RTA_OIF and RTA_MULTIPATH attributes carry the + * identifier of a host interface. If they match the + * host interface we're copying from, change them to + * match the corresponding identifier in the target + * namespace. + * + * If RTA_OIF doesn't match (NETLINK_GET_STRICT_CHK not + * available), or if any interface index in nexthop + * objects differ from the host interface, discard the + * route altogether. + */ if (rta->rta_type == RTA_OIF) { - /* The host obviously list's the host interface - * id here, we need to change it to the - * namespace's interface id - */ + if (*(unsigned int *)RTA_DATA(rta) != ifi_src) { + discard = true; + break; + } + *(unsigned int *)RTA_DATA(rta) = ifi_dst; - } else if (rta->rta_type == RTA_PREFSRC) { - /* Host routes might include a preferred source - * address, which must be one of the host's - * addresses. However, with -a pasta will use a - * different namespace address, making such a - * route invalid in the namespace. Strip off - * RTA_PREFSRC attributes to avoid that. */ + } else if (rta->rta_type == RTA_MULTIPATH) { + int nh_len = RTA_PAYLOAD(rta); + struct rtnexthop *rtnh; + + for (rtnh = (struct rtnexthop *)RTA_DATA(rta); + RTNH_OK(rtnh, nh_len); + rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) { + int src = (int)ifi_src; + + if (rtnh->rtnh_ifindex != src) { + discard = true; + break; + } + + rtnh->rtnh_ifindex = ifi_dst; + } + + if (discard) + break; + } else if (rta->rta_type == RTA_PREFSRC || + rta->rta_type == RTA_NH_ID) { + /* Strip RTA_PREFSRC attributes: host routes + * might include a preferred source address, + * which must be one of the host's addresses. + * However, with -a, pasta will use a different + * namespace address, making such a route + * invalid in the namespace. + * + * Strip RTA_NH_ID attributes: host routes set + * up via routing protocols (e.g. OSPF) might + * contain a nexthop ID (and not nexthop + * objects, which are taken care of in the + * RTA_MULTIPATH case above) that's not valid + * in the target namespace. + */ rta->rta_type = RTA_UNSPEC; } } + + if (discard) + nh->nlmsg_type = NLMSG_NOOP; + else + dup_routes++; } - if (!NLMSG_OK(nh, status) || status > 0) { + if (!NLMSG_OK(nh, left)) { /* Process any remaining datagrams in a different * buffer so we don't overwrite the first one. */ @@ -551,9 +652,9 @@ int nl_route_dup(int s_src, unsigned int ifi_src, * to calculate dependencies: let the kernel do that. */ for (i = 0; i < dup_routes; i++) { - for (nh = (struct nlmsghdr *)buf, status = nlmsgs_size; - NLMSG_OK(nh, status); - nh = NLMSG_NEXT(nh, status)) { + for (nh = (struct nlmsghdr *)buf, left = nlmsgs_size; + NLMSG_OK(nh, left); + nh = NLMSG_NEXT(nh, left)) { uint16_t flags = nh->nlmsg_flags; int rc; @@ -563,7 +664,8 @@ int nl_route_dup(int s_src, unsigned int ifi_src, rc = nl_do(s_dst, nh, RTM_NEWROUTE, (flags & ~NLM_F_DUMP_FILTERED) | NLM_F_CREATE, nh->nlmsg_len); - if (rc < 0 && rc != -ENETUNREACH && rc != -EEXIST) + if (rc < 0 && rc != -EEXIST && + rc != -ENETUNREACH && rc != -EHOSTUNREACH) return rc; } } @@ -572,6 +674,63 @@ int nl_route_dup(int s_src, unsigned int ifi_src, } /** + * nl_addr_set_ll_nodad() - Set IFA_F_NODAD on IPv6 link-local addresses + * @s: Netlink socket + * @ifi: Interface index in target namespace + * + * Return: 0 on success, negative error code on failure + */ +int nl_addr_set_ll_nodad(int s, unsigned int ifi) +{ + struct req_t { + struct nlmsghdr nlh; + struct ifaddrmsg ifa; + } req = { + .ifa.ifa_family = AF_INET6, + .ifa.ifa_index = ifi, + }; + uint32_t seq, last_seq = 0; + ssize_t status, ret = 0; + struct nlmsghdr *nh; + char buf[NLBUFSIZ]; + + seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req)); + nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) { + struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh); + struct rtattr *rta; + size_t na; + + if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK) + continue; + + ifa->ifa_flags |= IFA_F_NODAD; + + for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na); + rta = RTA_NEXT(rta, na)) { + /* If 32-bit flags are used, add IFA_F_NODAD there */ + if (rta->rta_type == IFA_FLAGS) + *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD; + } + + last_seq = nl_send(s, nh, RTM_NEWADDR, NLM_F_REPLACE, + nh->nlmsg_len); + } + + if (status < 0) + ret = status; + + for (seq = seq + 1; seq <= last_seq; seq++) { + nl_foreach(nh, status, s, buf, seq) + warn("netlink: Unexpected response message"); + + if (!ret && status < 0) + ret = status; + } + + return ret; +} + +/** * nl_addr_get() - Get most specific global address, given interface and family * @s: Netlink socket * @ifi: Interface index in outer network namespace @@ -580,7 +739,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src, * @prefix_len: Mask or prefix length, to fill (for IPv4) * @addr_l: Link-scoped address to fill (for IPv6) * - * Return: 9 on success, negative error code on failure + * Return: 0 on success, negative error code on failure */ int nl_addr_get(int s, unsigned int ifi, sa_family_t af, void *addr, int *prefix_len, void *addr_l) @@ -604,12 +763,13 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af, struct rtattr *rta; size_t na; - if (ifa->ifa_index != ifi) + if (ifa->ifa_index != ifi || ifa->ifa_flags & IFA_F_DEPRECATED) continue; for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { - if (rta->rta_type != IFA_ADDRESS) + if ((af == AF_INET && rta->rta_type != IFA_LOCAL) || + (af == AF_INET6 && rta->rta_type != IFA_ADDRESS)) continue; if (af == AF_INET && ifa->ifa_prefixlen > prefix_max) { @@ -637,7 +797,54 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af, } /** - * nl_add_set() - Set IP addresses for given interface and address family + * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface + * @s: Netlink socket + * @ifi: Interface index in outer network namespace + * @addr: Link-local address to fill + * + * Return: 0 on success, negative error code on failure + */ +int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr) +{ + struct req_t { + struct nlmsghdr nlh; + struct ifaddrmsg ifa; + } req = { + .ifa.ifa_family = AF_INET6, + .ifa.ifa_index = ifi, + }; + struct nlmsghdr *nh; + bool found = false; + char buf[NLBUFSIZ]; + ssize_t status; + uint32_t seq; + + seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req)); + nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) { + struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh); + struct rtattr *rta; + size_t na; + + if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK || + found) + continue; + + for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na); + rta = RTA_NEXT(rta, na)) { + if (rta->rta_type != IFA_ADDRESS) + continue; + + if (!found) { + memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta)); + found = true; + } + } + } + return status; +} + +/** + * nl_addr_set() - Set IP addresses for given interface and address family * @s: Netlink socket * @ifi: Interface index * @af: Address family @@ -740,10 +947,13 @@ int nl_addr_dup(int s_src, unsigned int ifi_src, ifa = (struct ifaddrmsg *)NLMSG_DATA(nh); if (rc < 0 || ifa->ifa_scope == RT_SCOPE_LINK || - ifa->ifa_index != ifi_src) + ifa->ifa_index != ifi_src || + ifa->ifa_flags & IFA_F_DEPRECATED) continue; ifa->ifa_index = ifi_dst; + /* Same as nl_addr_set(), but here it's more than a default */ + ifa->ifa_flags |= IFA_F_NODAD; for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { @@ -751,6 +961,10 @@ int nl_addr_dup(int s_src, unsigned int ifi_src, if (rta->rta_type == IFA_LABEL || rta->rta_type == IFA_CACHEINFO) rta->rta_type = IFA_UNSPEC; + + /* If 32-bit flags are used, add IFA_F_NODAD there */ + if (rta->rta_type == IFA_FLAGS) + *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD; } rc = nl_do(s_dst, nh, RTM_NEWADDR, @@ -832,14 +1046,14 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac) } /** - * nl_link_up() - Bring link up + * nl_link_set_mtu() - Set link MTU * @s: Netlink socket * @ifi: Interface index - * @mtu: If non-zero, set interface MTU + * @mtu: Interface MTU * * Return: 0 on success, negative error code on failure */ -int nl_link_up(int s, unsigned int ifi, int mtu) +int nl_link_set_mtu(int s, unsigned int ifi, int mtu) { struct req_t { struct nlmsghdr nlh; @@ -849,17 +1063,35 @@ int nl_link_up(int s, unsigned int ifi, int mtu) } req = { .ifm.ifi_family = AF_UNSPEC, .ifm.ifi_index = ifi, - .ifm.ifi_flags = IFF_UP, - .ifm.ifi_change = IFF_UP, .rta.rta_type = IFLA_MTU, .rta.rta_len = RTA_LENGTH(sizeof(unsigned int)), .mtu = mtu, }; - ssize_t len = sizeof(req); - if (!mtu) - /* Shorten request to drop MTU attribute */ - len = offsetof(struct req_t, rta); + return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req)); +} + +/** + * nl_link_set_flags() - Set link flags + * @s: Netlink socket + * @ifi: Interface index + * @set: Device flags to set + * @change: Mask of device flag changes + * + * Return: 0 on success, negative error code on failure + */ +int nl_link_set_flags(int s, unsigned int ifi, + unsigned int set, unsigned int change) +{ + struct req_t { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .ifm.ifi_family = AF_UNSPEC, + .ifm.ifi_index = ifi, + .ifm.ifi_flags = set, + .ifm.ifi_change = change, + }; - return nl_do(s, &req, RTM_NEWLINK, 0, len); + return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req)); } |