aboutgitcodebugslistschat
path: root/netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'netlink.c')
-rw-r--r--netlink.c503
1 files changed, 454 insertions, 49 deletions
diff --git a/netlink.c b/netlink.c
index 9b3dba2..82a2f0c 100644
--- a/netlink.c
+++ b/netlink.c
@@ -26,6 +26,7 @@
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
+#include <net/if_arp.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
@@ -33,7 +34,17 @@
#include "util.h"
#include "passt.h"
#include "log.h"
+#include "ip.h"
#include "netlink.h"
+#include "epoll_ctl.h"
+
+/* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */
+#define RTNH_NEXT_AND_DEC(rtnh, attrlen) \
+ ((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh))
+
+/* Convenience macro borrowed from kernel */
+#define NUD_VALID \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE)
/* Netlink expects a buffer of at least 8kiB or the system page size,
* whichever is larger. 32kiB is recommended for more efficient.
@@ -45,9 +56,10 @@
#define NLBUFSIZ 65536
/* Socket in init, in target namespace, sequence (just needs to be monotonic) */
-int nl_sock = -1;
-int nl_sock_ns = -1;
-static int nl_seq = 1;
+int nl_sock = -1;
+int nl_sock_ns = -1;
+static int nl_sock_neigh = -1;
+static int nl_seq = 1;
/**
* nl_sock_init_do() - Set up netlink sockets in init or target namespace
@@ -128,7 +140,7 @@ static uint32_t nl_send(int s, void *req, uint16_t type,
n = send(s, req, len, 0);
if (n < 0)
- die("netlink: Failed to send(): %s", strerror(errno));
+ die_perror("netlink: Failed to send()");
else if (n < len)
die("netlink: Short send (%zd of %zd bytes)", n, len);
@@ -184,7 +196,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t *
*n = recv(s, buf, NLBUFSIZ, 0);
if (*n < 0)
- die("netlink: Failed to recv(): %s", strerror(errno));
+ die_perror("netlink: Failed to recv()");
nh = (struct nlmsghdr *)buf;
if (!NLMSG_OK(nh, *n))
@@ -194,7 +206,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t *
}
/**
- * nl_foreach - 'for' type macro to step through netlink response messages
+ * nl_foreach() - 'for' type macro to step through netlink response messages
* nl_foreach_oftype - as above, but only messages of expected type
* @nh: Steps through each response header (struct nlmsghdr *)
* @status: When loop exits indicates if there was an error (ssize_t)
@@ -264,12 +276,12 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
size_t na;
/* Look for an interface with a default route first, failing that, look
- * for any interface with a route, and pick it only if it's the only
- * interface with a route.
+ * for any interface with a route, and pick the first one, if any.
*/
seq = nl_send(s, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWROUTE) {
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ const void *dst = NULL;
unsigned thisifi = 0;
if (rtm->rtm_family != af)
@@ -284,12 +296,27 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
rtnh = (struct rtnexthop *)RTA_DATA(rta);
thisifi = rtnh->rtnh_ifindex;
+ } else if (rta->rta_type == RTA_DST) {
+ dst = RTA_DATA(rta);
}
}
if (!thisifi)
continue; /* No interface for this route */
+ /* Skip 'lo': we should test IFF_LOOPBACK, but keep it simple */
+ if (thisifi == 1)
+ continue;
+
+ /* Skip routes to link-local addresses */
+ if (af == AF_INET && dst &&
+ IN4_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len))
+ continue;
+
+ if (af == AF_INET6 && dst &&
+ IN6_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len))
+ continue;
+
if (rtm->rtm_dst_len == 0) {
/* Default route */
ndef++;
@@ -304,25 +331,26 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
}
if (status < 0)
- warn("netlink: RTM_GETROUTE failed: %s", strerror(-status));
+ warn("netlink: RTM_GETROUTE failed: %s", strerror_(-status));
if (defifi) {
- if (ndef > 1)
+ if (ndef > 1) {
info("Multiple default %s routes, picked first",
- af == AF_INET ? "IPv4" : "IPv6");
+ af_name(af));
+ }
return defifi;
}
if (anyifi) {
- if (nany == 1)
- return anyifi;
-
- info("Multiple interfaces with %s routes, use -i to select one",
- af == AF_INET ? "IPv4" : "IPv6");
+ if (nany > 1) {
+ info("Multiple interfaces with %s routes, picked first",
+ af_name(af));
+ }
+ return anyifi;
}
if (!nany)
- info("No interfaces with %s routes", af == AF_INET ? "IPv4" : "IPv6");
+ info("No interfaces with usable %s routes", af_name(af));
return 0;
}
@@ -334,14 +362,15 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
*
* Return: true if a gateway was found, false otherwise
*/
-bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
+static bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
{
+ int nh_len = RTA_PAYLOAD(rta);
struct rtnexthop *rtnh;
bool found = false;
int hops = -1;
for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
- RTNH_OK(rtnh, RTA_PAYLOAD(rta)); rtnh = RTNH_NEXT(rtnh)) {
+ RTNH_OK(rtnh, nh_len); rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) {
size_t len = rtnh->rtnh_len - sizeof(*rtnh);
struct rtattr *rta_inner;
@@ -536,32 +565,81 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
NLMSG_OK(nh, left) && (status = nl_status(nh, left, seq)) > 0;
nh = NLMSG_NEXT(nh, left)) {
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ bool discard = false;
struct rtattr *rta;
size_t na;
if (nh->nlmsg_type != RTM_NEWROUTE)
continue;
- dup_routes++;
+ /* nexthop state flags don't apply to freshly created routes,
+ * and the kernel will refuse our route if they are set.
+ */
+ rtm->rtm_flags &= ~RTNH_COMPARE_MASK;
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
+ /* RTA_OIF and RTA_MULTIPATH attributes carry the
+ * identifier of a host interface. If they match the
+ * host interface we're copying from, change them to
+ * match the corresponding identifier in the target
+ * namespace.
+ *
+ * If RTA_OIF doesn't match (NETLINK_GET_STRICT_CHK not
+ * available), or if any interface index in nexthop
+ * objects differ from the host interface, discard the
+ * route altogether.
+ */
if (rta->rta_type == RTA_OIF) {
- /* The host obviously list's the host interface
- * id here, we need to change it to the
- * namespace's interface id
- */
+ if (*(unsigned int *)RTA_DATA(rta) != ifi_src) {
+ discard = true;
+ break;
+ }
+
*(unsigned int *)RTA_DATA(rta) = ifi_dst;
- } else if (rta->rta_type == RTA_PREFSRC) {
- /* Host routes might include a preferred source
- * address, which must be one of the host's
- * addresses. However, with -a pasta will use a
- * different namespace address, making such a
- * route invalid in the namespace. Strip off
- * RTA_PREFSRC attributes to avoid that. */
+ } else if (rta->rta_type == RTA_MULTIPATH) {
+ int nh_len = RTA_PAYLOAD(rta);
+ struct rtnexthop *rtnh;
+
+ for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
+ RTNH_OK(rtnh, nh_len);
+ rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) {
+ int src = (int)ifi_src;
+
+ if (rtnh->rtnh_ifindex != src) {
+ discard = true;
+ break;
+ }
+
+ rtnh->rtnh_ifindex = ifi_dst;
+ }
+
+ if (discard)
+ break;
+ } else if (rta->rta_type == RTA_PREFSRC ||
+ rta->rta_type == RTA_NH_ID) {
+ /* Strip RTA_PREFSRC attributes: host routes
+ * might include a preferred source address,
+ * which must be one of the host's addresses.
+ * However, with -a, pasta will use a different
+ * namespace address, making such a route
+ * invalid in the namespace.
+ *
+ * Strip RTA_NH_ID attributes: host routes set
+ * up via routing protocols (e.g. OSPF) might
+ * contain a nexthop ID (and not nexthop
+ * objects, which are taken care of in the
+ * RTA_MULTIPATH case above) that's not valid
+ * in the target namespace.
+ */
rta->rta_type = RTA_UNSPEC;
}
}
+
+ if (discard)
+ nh->nlmsg_type = NLMSG_NOOP;
+ else
+ dup_routes++;
}
if (!NLMSG_OK(nh, left)) {
@@ -602,7 +680,8 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
rc = nl_do(s_dst, nh, RTM_NEWROUTE,
(flags & ~NLM_F_DUMP_FILTERED) | NLM_F_CREATE,
nh->nlmsg_len);
- if (rc < 0 && rc != -ENETUNREACH && rc != -EEXIST)
+ if (rc < 0 && rc != -EEXIST &&
+ rc != -ENETUNREACH && rc != -EHOSTUNREACH)
return rc;
}
}
@@ -611,6 +690,63 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
}
/**
+ * nl_addr_set_ll_nodad() - Set IFA_F_NODAD on IPv6 link-local addresses
+ * @s: Netlink socket
+ * @ifi: Interface index in target namespace
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_set_ll_nodad(int s, unsigned int ifi)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ uint32_t seq, last_seq = 0;
+ ssize_t status, ret = 0;
+ struct nlmsghdr *nh;
+ char buf[NLBUFSIZ];
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK)
+ continue;
+
+ ifa->ifa_flags |= IFA_F_NODAD;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ /* If 32-bit flags are used, add IFA_F_NODAD there */
+ if (rta->rta_type == IFA_FLAGS)
+ *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD;
+ }
+
+ last_seq = nl_send(s, nh, RTM_NEWADDR, NLM_F_REPLACE,
+ nh->nlmsg_len);
+ }
+
+ if (status < 0)
+ ret = status;
+
+ for (seq = seq + 1; seq <= last_seq; seq++) {
+ nl_foreach(nh, status, s, buf, seq)
+ warn("netlink: Unexpected response message");
+
+ if (!ret && status < 0)
+ ret = status;
+ }
+
+ return ret;
+}
+
+/**
* nl_addr_get() - Get most specific global address, given interface and family
* @s: Netlink socket
* @ifi: Interface index in outer network namespace
@@ -619,7 +755,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
* @prefix_len: Mask or prefix length, to fill (for IPv4)
* @addr_l: Link-scoped address to fill (for IPv6)
*
- * Return: 9 on success, negative error code on failure
+ * Return: 0 on success, negative error code on failure
*/
int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
void *addr, int *prefix_len, void *addr_l)
@@ -643,12 +779,13 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
struct rtattr *rta;
size_t na;
- if (ifa->ifa_index != ifi)
+ if (ifa->ifa_index != ifi || ifa->ifa_flags & IFA_F_DEPRECATED)
continue;
for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != IFA_ADDRESS)
+ if ((af == AF_INET && rta->rta_type != IFA_LOCAL) ||
+ (af == AF_INET6 && rta->rta_type != IFA_ADDRESS))
continue;
if (af == AF_INET && ifa->ifa_prefixlen > prefix_max) {
@@ -656,7 +793,7 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
prefix_max = *prefix_len = ifa->ifa_prefixlen;
} else if (af == AF_INET6 && addr &&
- ifa->ifa_scope == RT_SCOPE_UNIVERSE &&
+ ifa->ifa_scope < RT_SCOPE_LINK &&
ifa->ifa_prefixlen > prefix_max) {
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
@@ -676,7 +813,54 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
}
/**
- * nl_add_set() - Set IP addresses for given interface and address family
+ * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface
+ * @s: Netlink socket
+ * @ifi: Interface index in outer network namespace
+ * @addr: Link-local address to fill
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ struct nlmsghdr *nh;
+ bool found = false;
+ char buf[NLBUFSIZ];
+ ssize_t status;
+ uint32_t seq;
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK ||
+ found)
+ continue;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFA_ADDRESS)
+ continue;
+
+ if (!found) {
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ found = true;
+ }
+ }
+ }
+ return status;
+}
+
+/**
+ * nl_addr_set() - Set IP addresses for given interface and address family
* @s: Netlink socket
* @ifi: Interface index
* @af: Address family
@@ -779,10 +963,13 @@ int nl_addr_dup(int s_src, unsigned int ifi_src,
ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
if (rc < 0 || ifa->ifa_scope == RT_SCOPE_LINK ||
- ifa->ifa_index != ifi_src)
+ ifa->ifa_index != ifi_src ||
+ ifa->ifa_flags & IFA_F_DEPRECATED)
continue;
ifa->ifa_index = ifi_dst;
+ /* Same as nl_addr_set(), but here it's more than a default */
+ ifa->ifa_flags |= IFA_F_NODAD;
for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
@@ -790,6 +977,10 @@ int nl_addr_dup(int s_src, unsigned int ifi_src,
if (rta->rta_type == IFA_LABEL ||
rta->rta_type == IFA_CACHEINFO)
rta->rta_type = IFA_UNSPEC;
+
+ /* If 32-bit flags are used, add IFA_F_NODAD there */
+ if (rta->rta_type == IFA_FLAGS)
+ *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD;
}
rc = nl_do(s_dst, nh, RTM_NEWADDR,
@@ -845,7 +1036,6 @@ int nl_link_get_mac(int s, unsigned int ifi, void *mac)
/**
* nl_link_set_mac() - Set link MAC address
* @s: Netlink socket
- * @ns: Use netlink socket in namespace
* @ifi: Interface index
* @mac: MAC address to set
*
@@ -871,14 +1061,14 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac)
}
/**
- * nl_link_up() - Bring link up
+ * nl_link_set_mtu() - Set link MTU
* @s: Netlink socket
* @ifi: Interface index
- * @mtu: If non-zero, set interface MTU
+ * @mtu: Interface MTU
*
* Return: 0 on success, negative error code on failure
*/
-int nl_link_up(int s, unsigned int ifi, int mtu)
+int nl_link_set_mtu(int s, unsigned int ifi, int mtu)
{
struct req_t {
struct nlmsghdr nlh;
@@ -888,17 +1078,232 @@ int nl_link_up(int s, unsigned int ifi, int mtu)
} req = {
.ifm.ifi_family = AF_UNSPEC,
.ifm.ifi_index = ifi,
- .ifm.ifi_flags = IFF_UP,
- .ifm.ifi_change = IFF_UP,
.rta.rta_type = IFLA_MTU,
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
.mtu = mtu,
};
- ssize_t len = sizeof(req);
- if (!mtu)
- /* Shorten request to drop MTU attribute */
- len = offsetof(struct req_t, rta);
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
+}
+
+/**
+ * nl_link_set_flags() - Set link flags
+ * @s: Netlink socket
+ * @ifi: Interface index
+ * @set: Device flags to set
+ * @change: Mask of device flag changes
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_link_set_flags(int s, unsigned int ifi,
+ unsigned int set, unsigned int change)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .ifm.ifi_family = AF_UNSPEC,
+ .ifm.ifi_index = ifi,
+ .ifm.ifi_flags = set,
+ .ifm.ifi_change = change,
+ };
+
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
+}
+
+/**
+ * nl_neigh_msg_read() - Interpret a neighbour state message from netlink
+ * @c: Execution context
+ * @nh: Message to be read
+ */
+static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nh);
+ struct rtattr *rta = (struct rtattr *)(ndm + 1);
+ size_t na = NLMSG_PAYLOAD(nh, sizeof(*ndm));
+ char ip_str[INET6_ADDRSTRLEN];
+ char mac_str[ETH_ADDRSTRLEN];
+ const uint8_t *lladdr = NULL;
+ union inany_addr addr, daddr;
+ const void *dst = NULL;
+ size_t lladdr_len = 0;
+ size_t dstlen = 0;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ return;
+
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *errmsg = (struct nlmsgerr *)ndm;
+
+ warn("netlink error message on neighbour notifier: %s",
+ strerror_(-errmsg->error));
+ return;
+ }
+
+ if (nh->nlmsg_type != RTM_NEWNEIGH && nh->nlmsg_type != RTM_DELNEIGH)
+ return;
+
+ for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type == NDA_DST) {
+ dst = RTA_DATA(rta);
+ dstlen = RTA_PAYLOAD(rta);
+ } else if (rta->rta_type == NDA_LLADDR) {
+ lladdr = RTA_DATA(rta);
+ lladdr_len = RTA_PAYLOAD(rta);
+ }
+ }
+
+ if (!dst)
+ return;
+
+ if (ndm->ndm_family == AF_INET && ndm->ndm_ifindex != c->ifi4)
+ return;
+
+ if (ndm->ndm_family == AF_INET6 && ndm->ndm_ifindex != c->ifi6)
+ return;
+
+ if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
+ return;
+
+ if (ndm->ndm_family == AF_INET && dstlen != sizeof(struct in_addr)) {
+ warn("netlink: wrong address length in AF_INET notification");
+ return;
+ }
+ if (ndm->ndm_family == AF_INET6 && dstlen != sizeof(struct in6_addr)) {
+ warn("netlink: wrong address length in AF_INET6 notification");
+ return;
+ }
+
+ /* We only handle guest-side visible addresses */
+ inany_from_af(&addr, ndm->ndm_family, dst);
+ if (!nat_inbound(c, &addr, &daddr))
+ return;
+
+ inany_ntop(&daddr, ip_str, sizeof(ip_str));
+
+ if (nh->nlmsg_type == RTM_DELNEIGH) {
+ trace("neighbour notifier delete: %s", ip_str);
+ fwd_neigh_table_free(c, &daddr);
+ return;
+ }
+ if (!(ndm->ndm_state & NUD_VALID)) {
+ trace("neighbour notifier: %s unreachable, state: 0x%04x",
+ ip_str, ndm->ndm_state);
+ fwd_neigh_table_free(c, &daddr);
+ return;
+ }
+ if (!lladdr) {
+ warn("RTM_NEWNEIGH %s: missing link layer address", ip_str);
+ return;
+ }
+ if (lladdr_len != ETH_ALEN || ndm->ndm_type != ARPHRD_ETHER)
+ return;
+
+ eth_ntop(lladdr, mac_str, sizeof(mac_str));
+ trace("neighbour notifier update: %s / %s", ip_str, mac_str);
+ fwd_neigh_table_update(c, &daddr, lladdr, false);
+}
- return nl_do(s, &req, RTM_NEWLINK, 0, len);
+/**
+ * nl_neigh_sync() - Read current contents of ARP/NDP tables
+ * @c: Execution context
+ * @proto: Protocol, AF_INET or AF_INET6
+ * @ifi: Interface index
+ */
+static void nl_neigh_sync(const struct ctx *c, int proto, int ifi)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ndmsg ndm;
+ } req = {
+ .ndm.ndm_family = proto,
+ .ndm.ndm_ifindex = ifi,
+ };
+ struct nlmsghdr *nh;
+ char buf[NLBUFSIZ];
+ ssize_t status;
+ uint32_t seq;
+
+ seq = nl_send(nl_sock_neigh, &req, RTM_GETNEIGH,
+ NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, nl_sock_neigh, buf, seq, RTM_NEWNEIGH)
+ nl_neigh_msg_read(c, nh);
+ if (status < 0)
+ warn("netlink: RTM_GETNEIGH failed: %s", strerror_(-status));
+}
+
+/**
+ * nl_neigh_notify_handler() - Non-blocking drain of pending neighbour updates
+ * @c: Execution context
+ */
+void nl_neigh_notify_handler(const struct ctx *c)
+{
+ char buf[NLBUFSIZ];
+
+ for (;;) {
+ ssize_t n = recv(nl_sock_neigh, buf, sizeof(buf), MSG_DONTWAIT);
+ struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno != EAGAIN)
+ warn_perror("netlink notifier read error");
+ return;
+ }
+ for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
+ nl_neigh_msg_read(c, nh);
+ }
+}
+
+/**
+ * nl_neigh_notify_init() - Subscribe to neighbour events
+ * @c: Execution context
+ *
+ * Return: 0 on success, -1 on failure
+ */
+int nl_neigh_notify_init(const struct ctx *c)
+{
+ union epoll_ref ref = {
+ .type = EPOLL_TYPE_NL_NEIGH
+ };
+ struct epoll_event ev = {
+ .events = EPOLLIN
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ .nl_groups = RTMGRP_NEIGH,
+ };
+
+ if (nl_sock_neigh >= 0) {
+ warn("netlink: neighbour notifier socket already exists");
+ return 0;
+ }
+
+ nl_sock_neigh = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+ NETLINK_ROUTE);
+ if (nl_sock_neigh < 0) {
+ warn_perror("Failed to create neighbour notifier socket");
+ return -1;
+ }
+
+ if (bind(nl_sock_neigh, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ warn_perror("Failed to bind neighbour notifier socket");
+ close(nl_sock_neigh);
+ nl_sock_neigh = -1;
+ return -1;
+ }
+
+ ev.data.u64 = ref.u64;
+ if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_neigh, &ev) == -1) {
+ warn_perror("epoll_ctl() on neighbour notifier socket failed");
+ close(nl_sock_neigh);
+ nl_sock_neigh = -1;
+ return -1;
+ }
+
+ nl_neigh_sync(c, AF_INET, c->ifi4);
+ nl_neigh_sync(c, AF_INET6, c->ifi6);
+
+ return 0;
}