aboutgitcodebugslistschat
path: root/netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'netlink.c')
-rw-r--r--netlink.c378
1 files changed, 356 insertions, 22 deletions
diff --git a/netlink.c b/netlink.c
index 093de26..82a2f0c 100644
--- a/netlink.c
+++ b/netlink.c
@@ -26,6 +26,7 @@
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
+#include <net/if_arp.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
@@ -35,11 +36,16 @@
#include "log.h"
#include "ip.h"
#include "netlink.h"
+#include "epoll_ctl.h"
/* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */
#define RTNH_NEXT_AND_DEC(rtnh, attrlen) \
((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh))
+/* Convenience macro borrowed from kernel */
+#define NUD_VALID \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE)
+
/* Netlink expects a buffer of at least 8kiB or the system page size,
* whichever is larger. 32kiB is recommended for more efficient.
* Since the largest page size on any remotely common Linux setup is
@@ -50,9 +56,10 @@
#define NLBUFSIZ 65536
/* Socket in init, in target namespace, sequence (just needs to be monotonic) */
-int nl_sock = -1;
-int nl_sock_ns = -1;
-static int nl_seq = 1;
+int nl_sock = -1;
+int nl_sock_ns = -1;
+static int nl_sock_neigh = -1;
+static int nl_seq = 1;
/**
* nl_sock_init_do() - Set up netlink sockets in init or target namespace
@@ -199,7 +206,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t *
}
/**
- * nl_foreach - 'for' type macro to step through netlink response messages
+ * nl_foreach() - 'for' type macro to step through netlink response messages
* nl_foreach_oftype - as above, but only messages of expected type
* @nh: Steps through each response header (struct nlmsghdr *)
* @status: When loop exits indicates if there was an error (ssize_t)
@@ -297,6 +304,10 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
if (!thisifi)
continue; /* No interface for this route */
+ /* Skip 'lo': we should test IFF_LOOPBACK, but keep it simple */
+ if (thisifi == 1)
+ continue;
+
/* Skip routes to link-local addresses */
if (af == AF_INET && dst &&
IN4_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len))
@@ -320,7 +331,7 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
}
if (status < 0)
- warn("netlink: RTM_GETROUTE failed: %s", strerror(-status));
+ warn("netlink: RTM_GETROUTE failed: %s", strerror_(-status));
if (defifi) {
if (ndef > 1) {
@@ -351,9 +362,9 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
*
* Return: true if a gateway was found, false otherwise
*/
-bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
+static bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
{
- size_t nh_len = RTA_PAYLOAD(rta);
+ int nh_len = RTA_PAYLOAD(rta);
struct rtnexthop *rtnh;
bool found = false;
int hops = -1;
@@ -561,6 +572,11 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
if (nh->nlmsg_type != RTM_NEWROUTE)
continue;
+ /* nexthop state flags don't apply to freshly created routes,
+ * and the kernel will refuse our route if they are set.
+ */
+ rtm->rtm_flags &= ~RTNH_COMPARE_MASK;
+
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
/* RTA_OIF and RTA_MULTIPATH attributes carry the
@@ -582,7 +598,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
*(unsigned int *)RTA_DATA(rta) = ifi_dst;
} else if (rta->rta_type == RTA_MULTIPATH) {
- size_t nh_len = RTA_PAYLOAD(rta);
+ int nh_len = RTA_PAYLOAD(rta);
struct rtnexthop *rtnh;
for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
@@ -674,6 +690,63 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
}
/**
+ * nl_addr_set_ll_nodad() - Set IFA_F_NODAD on IPv6 link-local addresses
+ * @s: Netlink socket
+ * @ifi: Interface index in target namespace
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_set_ll_nodad(int s, unsigned int ifi)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ uint32_t seq, last_seq = 0;
+ ssize_t status, ret = 0;
+ struct nlmsghdr *nh;
+ char buf[NLBUFSIZ];
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK)
+ continue;
+
+ ifa->ifa_flags |= IFA_F_NODAD;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ /* If 32-bit flags are used, add IFA_F_NODAD there */
+ if (rta->rta_type == IFA_FLAGS)
+ *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD;
+ }
+
+ last_seq = nl_send(s, nh, RTM_NEWADDR, NLM_F_REPLACE,
+ nh->nlmsg_len);
+ }
+
+ if (status < 0)
+ ret = status;
+
+ for (seq = seq + 1; seq <= last_seq; seq++) {
+ nl_foreach(nh, status, s, buf, seq)
+ warn("netlink: Unexpected response message");
+
+ if (!ret && status < 0)
+ ret = status;
+ }
+
+ return ret;
+}
+
+/**
* nl_addr_get() - Get most specific global address, given interface and family
* @s: Netlink socket
* @ifi: Interface index in outer network namespace
@@ -682,7 +755,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
* @prefix_len: Mask or prefix length, to fill (for IPv4)
* @addr_l: Link-scoped address to fill (for IPv6)
*
- * Return: 9 on success, negative error code on failure
+ * Return: 0 on success, negative error code on failure
*/
int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
void *addr, int *prefix_len, void *addr_l)
@@ -720,7 +793,7 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
prefix_max = *prefix_len = ifa->ifa_prefixlen;
} else if (af == AF_INET6 && addr &&
- ifa->ifa_scope == RT_SCOPE_UNIVERSE &&
+ ifa->ifa_scope < RT_SCOPE_LINK &&
ifa->ifa_prefixlen > prefix_max) {
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
@@ -740,7 +813,54 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
}
/**
- * nl_add_set() - Set IP addresses for given interface and address family
+ * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface
+ * @s: Netlink socket
+ * @ifi: Interface index in outer network namespace
+ * @addr: Link-local address to fill
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ struct nlmsghdr *nh;
+ bool found = false;
+ char buf[NLBUFSIZ];
+ ssize_t status;
+ uint32_t seq;
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK ||
+ found)
+ continue;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFA_ADDRESS)
+ continue;
+
+ if (!found) {
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ found = true;
+ }
+ }
+ }
+ return status;
+}
+
+/**
+ * nl_addr_set() - Set IP addresses for given interface and address family
* @s: Netlink socket
* @ifi: Interface index
* @af: Address family
@@ -916,7 +1036,6 @@ int nl_link_get_mac(int s, unsigned int ifi, void *mac)
/**
* nl_link_set_mac() - Set link MAC address
* @s: Netlink socket
- * @ns: Use netlink socket in namespace
* @ifi: Interface index
* @mac: MAC address to set
*
@@ -942,14 +1061,14 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac)
}
/**
- * nl_link_up() - Bring link up
+ * nl_link_set_mtu() - Set link MTU
* @s: Netlink socket
* @ifi: Interface index
- * @mtu: If non-zero, set interface MTU
+ * @mtu: Interface MTU
*
* Return: 0 on success, negative error code on failure
*/
-int nl_link_up(int s, unsigned int ifi, int mtu)
+int nl_link_set_mtu(int s, unsigned int ifi, int mtu)
{
struct req_t {
struct nlmsghdr nlh;
@@ -959,17 +1078,232 @@ int nl_link_up(int s, unsigned int ifi, int mtu)
} req = {
.ifm.ifi_family = AF_UNSPEC,
.ifm.ifi_index = ifi,
- .ifm.ifi_flags = IFF_UP,
- .ifm.ifi_change = IFF_UP,
.rta.rta_type = IFLA_MTU,
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
.mtu = mtu,
};
- ssize_t len = sizeof(req);
- if (!mtu)
- /* Shorten request to drop MTU attribute */
- len = offsetof(struct req_t, rta);
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
+}
+
+/**
+ * nl_link_set_flags() - Set link flags
+ * @s: Netlink socket
+ * @ifi: Interface index
+ * @set: Device flags to set
+ * @change: Mask of device flag changes
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_link_set_flags(int s, unsigned int ifi,
+ unsigned int set, unsigned int change)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .ifm.ifi_family = AF_UNSPEC,
+ .ifm.ifi_index = ifi,
+ .ifm.ifi_flags = set,
+ .ifm.ifi_change = change,
+ };
+
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
+}
+
+/**
+ * nl_neigh_msg_read() - Interpret a neighbour state message from netlink
+ * @c: Execution context
+ * @nh: Message to be read
+ */
+static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nh);
+ struct rtattr *rta = (struct rtattr *)(ndm + 1);
+ size_t na = NLMSG_PAYLOAD(nh, sizeof(*ndm));
+ char ip_str[INET6_ADDRSTRLEN];
+ char mac_str[ETH_ADDRSTRLEN];
+ const uint8_t *lladdr = NULL;
+ union inany_addr addr, daddr;
+ const void *dst = NULL;
+ size_t lladdr_len = 0;
+ size_t dstlen = 0;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ return;
+
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *errmsg = (struct nlmsgerr *)ndm;
+
+ warn("netlink error message on neighbour notifier: %s",
+ strerror_(-errmsg->error));
+ return;
+ }
+
+ if (nh->nlmsg_type != RTM_NEWNEIGH && nh->nlmsg_type != RTM_DELNEIGH)
+ return;
+
+ for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type == NDA_DST) {
+ dst = RTA_DATA(rta);
+ dstlen = RTA_PAYLOAD(rta);
+ } else if (rta->rta_type == NDA_LLADDR) {
+ lladdr = RTA_DATA(rta);
+ lladdr_len = RTA_PAYLOAD(rta);
+ }
+ }
+
+ if (!dst)
+ return;
+
+ if (ndm->ndm_family == AF_INET && ndm->ndm_ifindex != c->ifi4)
+ return;
+
+ if (ndm->ndm_family == AF_INET6 && ndm->ndm_ifindex != c->ifi6)
+ return;
+
+ if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
+ return;
+
+ if (ndm->ndm_family == AF_INET && dstlen != sizeof(struct in_addr)) {
+ warn("netlink: wrong address length in AF_INET notification");
+ return;
+ }
+ if (ndm->ndm_family == AF_INET6 && dstlen != sizeof(struct in6_addr)) {
+ warn("netlink: wrong address length in AF_INET6 notification");
+ return;
+ }
- return nl_do(s, &req, RTM_NEWLINK, 0, len);
+ /* We only handle guest-side visible addresses */
+ inany_from_af(&addr, ndm->ndm_family, dst);
+ if (!nat_inbound(c, &addr, &daddr))
+ return;
+
+ inany_ntop(&daddr, ip_str, sizeof(ip_str));
+
+ if (nh->nlmsg_type == RTM_DELNEIGH) {
+ trace("neighbour notifier delete: %s", ip_str);
+ fwd_neigh_table_free(c, &daddr);
+ return;
+ }
+ if (!(ndm->ndm_state & NUD_VALID)) {
+ trace("neighbour notifier: %s unreachable, state: 0x%04x",
+ ip_str, ndm->ndm_state);
+ fwd_neigh_table_free(c, &daddr);
+ return;
+ }
+ if (!lladdr) {
+ warn("RTM_NEWNEIGH %s: missing link layer address", ip_str);
+ return;
+ }
+ if (lladdr_len != ETH_ALEN || ndm->ndm_type != ARPHRD_ETHER)
+ return;
+
+ eth_ntop(lladdr, mac_str, sizeof(mac_str));
+ trace("neighbour notifier update: %s / %s", ip_str, mac_str);
+ fwd_neigh_table_update(c, &daddr, lladdr, false);
+}
+
+/**
+ * nl_neigh_sync() - Read current contents of ARP/NDP tables
+ * @c: Execution context
+ * @proto: Protocol, AF_INET or AF_INET6
+ * @ifi: Interface index
+ */
+static void nl_neigh_sync(const struct ctx *c, int proto, int ifi)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ndmsg ndm;
+ } req = {
+ .ndm.ndm_family = proto,
+ .ndm.ndm_ifindex = ifi,
+ };
+ struct nlmsghdr *nh;
+ char buf[NLBUFSIZ];
+ ssize_t status;
+ uint32_t seq;
+
+ seq = nl_send(nl_sock_neigh, &req, RTM_GETNEIGH,
+ NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, nl_sock_neigh, buf, seq, RTM_NEWNEIGH)
+ nl_neigh_msg_read(c, nh);
+ if (status < 0)
+ warn("netlink: RTM_GETNEIGH failed: %s", strerror_(-status));
+}
+
+/**
+ * nl_neigh_notify_handler() - Non-blocking drain of pending neighbour updates
+ * @c: Execution context
+ */
+void nl_neigh_notify_handler(const struct ctx *c)
+{
+ char buf[NLBUFSIZ];
+
+ for (;;) {
+ ssize_t n = recv(nl_sock_neigh, buf, sizeof(buf), MSG_DONTWAIT);
+ struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno != EAGAIN)
+ warn_perror("netlink notifier read error");
+ return;
+ }
+ for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
+ nl_neigh_msg_read(c, nh);
+ }
+}
+
+/**
+ * nl_neigh_notify_init() - Subscribe to neighbour events
+ * @c: Execution context
+ *
+ * Return: 0 on success, -1 on failure
+ */
+int nl_neigh_notify_init(const struct ctx *c)
+{
+ union epoll_ref ref = {
+ .type = EPOLL_TYPE_NL_NEIGH
+ };
+ struct epoll_event ev = {
+ .events = EPOLLIN
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ .nl_groups = RTMGRP_NEIGH,
+ };
+
+ if (nl_sock_neigh >= 0) {
+ warn("netlink: neighbour notifier socket already exists");
+ return 0;
+ }
+
+ nl_sock_neigh = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+ NETLINK_ROUTE);
+ if (nl_sock_neigh < 0) {
+ warn_perror("Failed to create neighbour notifier socket");
+ return -1;
+ }
+
+ if (bind(nl_sock_neigh, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ warn_perror("Failed to bind neighbour notifier socket");
+ close(nl_sock_neigh);
+ nl_sock_neigh = -1;
+ return -1;
+ }
+
+ ev.data.u64 = ref.u64;
+ if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_neigh, &ev) == -1) {
+ warn_perror("epoll_ctl() on neighbour notifier socket failed");
+ close(nl_sock_neigh);
+ nl_sock_neigh = -1;
+ return -1;
+ }
+
+ nl_neigh_sync(c, AF_INET, c->ifi4);
+ nl_neigh_sync(c, AF_INET6, c->ifi6);
+
+ return 0;
}