diff options
Diffstat (limited to 'netlink.c')
| -rw-r--r-- | netlink.c | 378 |
1 files changed, 356 insertions, 22 deletions
@@ -26,6 +26,7 @@ #include <arpa/inet.h> #include <netinet/in.h> #include <netinet/if_ether.h> +#include <net/if_arp.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> @@ -35,11 +36,16 @@ #include "log.h" #include "ip.h" #include "netlink.h" +#include "epoll_ctl.h" /* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */ #define RTNH_NEXT_AND_DEC(rtnh, attrlen) \ ((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh)) +/* Convenience macro borrowed from kernel */ +#define NUD_VALID \ + (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE) + /* Netlink expects a buffer of at least 8kiB or the system page size, * whichever is larger. 32kiB is recommended for more efficient. * Since the largest page size on any remotely common Linux setup is @@ -50,9 +56,10 @@ #define NLBUFSIZ 65536 /* Socket in init, in target namespace, sequence (just needs to be monotonic) */ -int nl_sock = -1; -int nl_sock_ns = -1; -static int nl_seq = 1; +int nl_sock = -1; +int nl_sock_ns = -1; +static int nl_sock_neigh = -1; +static int nl_seq = 1; /** * nl_sock_init_do() - Set up netlink sockets in init or target namespace @@ -199,7 +206,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t * } /** - * nl_foreach - 'for' type macro to step through netlink response messages + * nl_foreach() - 'for' type macro to step through netlink response messages * nl_foreach_oftype - as above, but only messages of expected type * @nh: Steps through each response header (struct nlmsghdr *) * @status: When loop exits indicates if there was an error (ssize_t) @@ -297,6 +304,10 @@ unsigned int nl_get_ext_if(int s, sa_family_t af) if (!thisifi) continue; /* No interface for this route */ + /* Skip 'lo': we should test IFF_LOOPBACK, but keep it simple */ + if (thisifi == 1) + continue; + /* Skip routes to link-local addresses */ if (af == AF_INET && dst && IN4_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len)) @@ -320,7 +331,7 @@ unsigned int nl_get_ext_if(int s, sa_family_t af) } if (status < 0) - warn("netlink: RTM_GETROUTE failed: %s", strerror(-status)); + warn("netlink: RTM_GETROUTE failed: %s", strerror_(-status)); if (defifi) { if (ndef > 1) { @@ -351,9 +362,9 @@ unsigned int nl_get_ext_if(int s, sa_family_t af) * * Return: true if a gateway was found, false otherwise */ -bool nl_route_get_def_multipath(struct rtattr *rta, void *gw) +static bool nl_route_get_def_multipath(struct rtattr *rta, void *gw) { - size_t nh_len = RTA_PAYLOAD(rta); + int nh_len = RTA_PAYLOAD(rta); struct rtnexthop *rtnh; bool found = false; int hops = -1; @@ -561,6 +572,11 @@ int nl_route_dup(int s_src, unsigned int ifi_src, if (nh->nlmsg_type != RTM_NEWROUTE) continue; + /* nexthop state flags don't apply to freshly created routes, + * and the kernel will refuse our route if they are set. + */ + rtm->rtm_flags &= ~RTNH_COMPARE_MASK; + for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { /* RTA_OIF and RTA_MULTIPATH attributes carry the @@ -582,7 +598,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src, *(unsigned int *)RTA_DATA(rta) = ifi_dst; } else if (rta->rta_type == RTA_MULTIPATH) { - size_t nh_len = RTA_PAYLOAD(rta); + int nh_len = RTA_PAYLOAD(rta); struct rtnexthop *rtnh; for (rtnh = (struct rtnexthop *)RTA_DATA(rta); @@ -674,6 +690,63 @@ int nl_route_dup(int s_src, unsigned int ifi_src, } /** + * nl_addr_set_ll_nodad() - Set IFA_F_NODAD on IPv6 link-local addresses + * @s: Netlink socket + * @ifi: Interface index in target namespace + * + * Return: 0 on success, negative error code on failure + */ +int nl_addr_set_ll_nodad(int s, unsigned int ifi) +{ + struct req_t { + struct nlmsghdr nlh; + struct ifaddrmsg ifa; + } req = { + .ifa.ifa_family = AF_INET6, + .ifa.ifa_index = ifi, + }; + uint32_t seq, last_seq = 0; + ssize_t status, ret = 0; + struct nlmsghdr *nh; + char buf[NLBUFSIZ]; + + seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req)); + nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) { + struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh); + struct rtattr *rta; + size_t na; + + if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK) + continue; + + ifa->ifa_flags |= IFA_F_NODAD; + + for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na); + rta = RTA_NEXT(rta, na)) { + /* If 32-bit flags are used, add IFA_F_NODAD there */ + if (rta->rta_type == IFA_FLAGS) + *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD; + } + + last_seq = nl_send(s, nh, RTM_NEWADDR, NLM_F_REPLACE, + nh->nlmsg_len); + } + + if (status < 0) + ret = status; + + for (seq = seq + 1; seq <= last_seq; seq++) { + nl_foreach(nh, status, s, buf, seq) + warn("netlink: Unexpected response message"); + + if (!ret && status < 0) + ret = status; + } + + return ret; +} + +/** * nl_addr_get() - Get most specific global address, given interface and family * @s: Netlink socket * @ifi: Interface index in outer network namespace @@ -682,7 +755,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src, * @prefix_len: Mask or prefix length, to fill (for IPv4) * @addr_l: Link-scoped address to fill (for IPv6) * - * Return: 9 on success, negative error code on failure + * Return: 0 on success, negative error code on failure */ int nl_addr_get(int s, unsigned int ifi, sa_family_t af, void *addr, int *prefix_len, void *addr_l) @@ -720,7 +793,7 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af, prefix_max = *prefix_len = ifa->ifa_prefixlen; } else if (af == AF_INET6 && addr && - ifa->ifa_scope == RT_SCOPE_UNIVERSE && + ifa->ifa_scope < RT_SCOPE_LINK && ifa->ifa_prefixlen > prefix_max) { memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta)); @@ -740,7 +813,54 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af, } /** - * nl_add_set() - Set IP addresses for given interface and address family + * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface + * @s: Netlink socket + * @ifi: Interface index in outer network namespace + * @addr: Link-local address to fill + * + * Return: 0 on success, negative error code on failure + */ +int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr) +{ + struct req_t { + struct nlmsghdr nlh; + struct ifaddrmsg ifa; + } req = { + .ifa.ifa_family = AF_INET6, + .ifa.ifa_index = ifi, + }; + struct nlmsghdr *nh; + bool found = false; + char buf[NLBUFSIZ]; + ssize_t status; + uint32_t seq; + + seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req)); + nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) { + struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh); + struct rtattr *rta; + size_t na; + + if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK || + found) + continue; + + for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na); + rta = RTA_NEXT(rta, na)) { + if (rta->rta_type != IFA_ADDRESS) + continue; + + if (!found) { + memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta)); + found = true; + } + } + } + return status; +} + +/** + * nl_addr_set() - Set IP addresses for given interface and address family * @s: Netlink socket * @ifi: Interface index * @af: Address family @@ -916,7 +1036,6 @@ int nl_link_get_mac(int s, unsigned int ifi, void *mac) /** * nl_link_set_mac() - Set link MAC address * @s: Netlink socket - * @ns: Use netlink socket in namespace * @ifi: Interface index * @mac: MAC address to set * @@ -942,14 +1061,14 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac) } /** - * nl_link_up() - Bring link up + * nl_link_set_mtu() - Set link MTU * @s: Netlink socket * @ifi: Interface index - * @mtu: If non-zero, set interface MTU + * @mtu: Interface MTU * * Return: 0 on success, negative error code on failure */ -int nl_link_up(int s, unsigned int ifi, int mtu) +int nl_link_set_mtu(int s, unsigned int ifi, int mtu) { struct req_t { struct nlmsghdr nlh; @@ -959,17 +1078,232 @@ int nl_link_up(int s, unsigned int ifi, int mtu) } req = { .ifm.ifi_family = AF_UNSPEC, .ifm.ifi_index = ifi, - .ifm.ifi_flags = IFF_UP, - .ifm.ifi_change = IFF_UP, .rta.rta_type = IFLA_MTU, .rta.rta_len = RTA_LENGTH(sizeof(unsigned int)), .mtu = mtu, }; - ssize_t len = sizeof(req); - if (!mtu) - /* Shorten request to drop MTU attribute */ - len = offsetof(struct req_t, rta); + return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req)); +} + +/** + * nl_link_set_flags() - Set link flags + * @s: Netlink socket + * @ifi: Interface index + * @set: Device flags to set + * @change: Mask of device flag changes + * + * Return: 0 on success, negative error code on failure + */ +int nl_link_set_flags(int s, unsigned int ifi, + unsigned int set, unsigned int change) +{ + struct req_t { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .ifm.ifi_family = AF_UNSPEC, + .ifm.ifi_index = ifi, + .ifm.ifi_flags = set, + .ifm.ifi_change = change, + }; + + return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req)); +} + +/** + * nl_neigh_msg_read() - Interpret a neighbour state message from netlink + * @c: Execution context + * @nh: Message to be read + */ +static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh) +{ + struct ndmsg *ndm = NLMSG_DATA(nh); + struct rtattr *rta = (struct rtattr *)(ndm + 1); + size_t na = NLMSG_PAYLOAD(nh, sizeof(*ndm)); + char ip_str[INET6_ADDRSTRLEN]; + char mac_str[ETH_ADDRSTRLEN]; + const uint8_t *lladdr = NULL; + union inany_addr addr, daddr; + const void *dst = NULL; + size_t lladdr_len = 0; + size_t dstlen = 0; + + if (nh->nlmsg_type == NLMSG_DONE) + return; + + if (nh->nlmsg_type == NLMSG_ERROR) { + const struct nlmsgerr *errmsg = (struct nlmsgerr *)ndm; + + warn("netlink error message on neighbour notifier: %s", + strerror_(-errmsg->error)); + return; + } + + if (nh->nlmsg_type != RTM_NEWNEIGH && nh->nlmsg_type != RTM_DELNEIGH) + return; + + for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + if (rta->rta_type == NDA_DST) { + dst = RTA_DATA(rta); + dstlen = RTA_PAYLOAD(rta); + } else if (rta->rta_type == NDA_LLADDR) { + lladdr = RTA_DATA(rta); + lladdr_len = RTA_PAYLOAD(rta); + } + } + + if (!dst) + return; + + if (ndm->ndm_family == AF_INET && ndm->ndm_ifindex != c->ifi4) + return; + + if (ndm->ndm_family == AF_INET6 && ndm->ndm_ifindex != c->ifi6) + return; + + if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6) + return; + + if (ndm->ndm_family == AF_INET && dstlen != sizeof(struct in_addr)) { + warn("netlink: wrong address length in AF_INET notification"); + return; + } + if (ndm->ndm_family == AF_INET6 && dstlen != sizeof(struct in6_addr)) { + warn("netlink: wrong address length in AF_INET6 notification"); + return; + } - return nl_do(s, &req, RTM_NEWLINK, 0, len); + /* We only handle guest-side visible addresses */ + inany_from_af(&addr, ndm->ndm_family, dst); + if (!nat_inbound(c, &addr, &daddr)) + return; + + inany_ntop(&daddr, ip_str, sizeof(ip_str)); + + if (nh->nlmsg_type == RTM_DELNEIGH) { + trace("neighbour notifier delete: %s", ip_str); + fwd_neigh_table_free(c, &daddr); + return; + } + if (!(ndm->ndm_state & NUD_VALID)) { + trace("neighbour notifier: %s unreachable, state: 0x%04x", + ip_str, ndm->ndm_state); + fwd_neigh_table_free(c, &daddr); + return; + } + if (!lladdr) { + warn("RTM_NEWNEIGH %s: missing link layer address", ip_str); + return; + } + if (lladdr_len != ETH_ALEN || ndm->ndm_type != ARPHRD_ETHER) + return; + + eth_ntop(lladdr, mac_str, sizeof(mac_str)); + trace("neighbour notifier update: %s / %s", ip_str, mac_str); + fwd_neigh_table_update(c, &daddr, lladdr, false); +} + +/** + * nl_neigh_sync() - Read current contents of ARP/NDP tables + * @c: Execution context + * @proto: Protocol, AF_INET or AF_INET6 + * @ifi: Interface index + */ +static void nl_neigh_sync(const struct ctx *c, int proto, int ifi) +{ + struct { + struct nlmsghdr nlh; + struct ndmsg ndm; + } req = { + .ndm.ndm_family = proto, + .ndm.ndm_ifindex = ifi, + }; + struct nlmsghdr *nh; + char buf[NLBUFSIZ]; + ssize_t status; + uint32_t seq; + + seq = nl_send(nl_sock_neigh, &req, RTM_GETNEIGH, + NLM_F_DUMP, sizeof(req)); + nl_foreach_oftype(nh, status, nl_sock_neigh, buf, seq, RTM_NEWNEIGH) + nl_neigh_msg_read(c, nh); + if (status < 0) + warn("netlink: RTM_GETNEIGH failed: %s", strerror_(-status)); +} + +/** + * nl_neigh_notify_handler() - Non-blocking drain of pending neighbour updates + * @c: Execution context + */ +void nl_neigh_notify_handler(const struct ctx *c) +{ + char buf[NLBUFSIZ]; + + for (;;) { + ssize_t n = recv(nl_sock_neigh, buf, sizeof(buf), MSG_DONTWAIT); + struct nlmsghdr *nh = (struct nlmsghdr *)buf; + + if (n < 0) { + if (errno == EINTR) + continue; + if (errno != EAGAIN) + warn_perror("netlink notifier read error"); + return; + } + for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) + nl_neigh_msg_read(c, nh); + } +} + +/** + * nl_neigh_notify_init() - Subscribe to neighbour events + * @c: Execution context + * + * Return: 0 on success, -1 on failure + */ +int nl_neigh_notify_init(const struct ctx *c) +{ + union epoll_ref ref = { + .type = EPOLL_TYPE_NL_NEIGH + }; + struct epoll_event ev = { + .events = EPOLLIN + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + .nl_groups = RTMGRP_NEIGH, + }; + + if (nl_sock_neigh >= 0) { + warn("netlink: neighbour notifier socket already exists"); + return 0; + } + + nl_sock_neigh = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, + NETLINK_ROUTE); + if (nl_sock_neigh < 0) { + warn_perror("Failed to create neighbour notifier socket"); + return -1; + } + + if (bind(nl_sock_neigh, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + warn_perror("Failed to bind neighbour notifier socket"); + close(nl_sock_neigh); + nl_sock_neigh = -1; + return -1; + } + + ev.data.u64 = ref.u64; + if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_neigh, &ev) == -1) { + warn_perror("epoll_ctl() on neighbour notifier socket failed"); + close(nl_sock_neigh); + nl_sock_neigh = -1; + return -1; + } + + nl_neigh_sync(c, AF_INET, c->ifi4); + nl_neigh_sync(c, AF_INET6, c->ifi6); + + return 0; } |
