aboutgitcodebugslistschat
path: root/netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'netlink.c')
-rw-r--r--netlink.c324
1 files changed, 278 insertions, 46 deletions
diff --git a/netlink.c b/netlink.c
index 9e7cccb..4aba2a3 100644
--- a/netlink.c
+++ b/netlink.c
@@ -33,8 +33,13 @@
#include "util.h"
#include "passt.h"
#include "log.h"
+#include "ip.h"
#include "netlink.h"
+/* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */
+#define RTNH_NEXT_AND_DEC(rtnh, attrlen) \
+ ((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh))
+
/* Netlink expects a buffer of at least 8kiB or the system page size,
* whichever is larger. 32kiB is recommended for more efficient.
* Since the largest page size on any remotely common Linux setup is
@@ -128,7 +133,7 @@ static uint32_t nl_send(int s, void *req, uint16_t type,
n = send(s, req, len, 0);
if (n < 0)
- die("netlink: Failed to send(): %s", strerror(errno));
+ die_perror("netlink: Failed to send()");
else if (n < len)
die("netlink: Short send (%zd of %zd bytes)", n, len);
@@ -184,7 +189,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t *
*n = recv(s, buf, NLBUFSIZ, 0);
if (*n < 0)
- die("netlink: Failed to recv(): %s", strerror(errno));
+ die_perror("netlink: Failed to recv()");
nh = (struct nlmsghdr *)buf;
if (!NLMSG_OK(nh, *n))
@@ -254,7 +259,8 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
.rtm.rtm_type = RTN_UNICAST,
.rtm.rtm_family = af,
};
- unsigned int ifi = 0;
+ unsigned defifi = 0, anyifi = 0;
+ unsigned ndef = 0, nany = 0;
struct nlmsghdr *nh;
struct rtattr *rta;
char buf[NLBUFSIZ];
@@ -262,30 +268,80 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
uint32_t seq;
size_t na;
+ /* Look for an interface with a default route first, failing that, look
+ * for any interface with a route, and pick the first one, if any.
+ */
seq = nl_send(s, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWROUTE) {
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ const void *dst = NULL;
+ unsigned thisifi = 0;
- if (ifi || rtm->rtm_dst_len || rtm->rtm_family != af)
+ if (rtm->rtm_family != af)
continue;
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
if (rta->rta_type == RTA_OIF) {
- ifi = *(unsigned int *)RTA_DATA(rta);
+ thisifi = *(unsigned int *)RTA_DATA(rta);
} else if (rta->rta_type == RTA_MULTIPATH) {
const struct rtnexthop *rtnh;
rtnh = (struct rtnexthop *)RTA_DATA(rta);
- ifi = rtnh->rtnh_ifindex;
+ thisifi = rtnh->rtnh_ifindex;
+ } else if (rta->rta_type == RTA_DST) {
+ dst = RTA_DATA(rta);
}
}
+
+ if (!thisifi)
+ continue; /* No interface for this route */
+
+ /* Skip routes to link-local addresses */
+ if (af == AF_INET && dst &&
+ IN4_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len))
+ continue;
+
+ if (af == AF_INET6 && dst &&
+ IN6_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len))
+ continue;
+
+ if (rtm->rtm_dst_len == 0) {
+ /* Default route */
+ ndef++;
+ if (!defifi)
+ defifi = thisifi;
+ } else {
+ /* Non-default route */
+ nany++;
+ if (!anyifi)
+ anyifi = thisifi;
+ }
}
if (status < 0)
warn("netlink: RTM_GETROUTE failed: %s", strerror(-status));
- return ifi;
+ if (defifi) {
+ if (ndef > 1) {
+ info("Multiple default %s routes, picked first",
+ af_name(af));
+ }
+ return defifi;
+ }
+
+ if (anyifi) {
+ if (nany > 1) {
+ info("Multiple interfaces with %s routes, picked first",
+ af_name(af));
+ }
+ return anyifi;
+ }
+
+ if (!nany)
+ info("No interfaces with usable %s routes", af_name(af));
+
+ return 0;
}
/**
@@ -297,12 +353,13 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
*/
bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
{
+ int nh_len = RTA_PAYLOAD(rta);
struct rtnexthop *rtnh;
bool found = false;
int hops = -1;
for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
- RTNH_OK(rtnh, RTA_PAYLOAD(rta)); rtnh = RTNH_NEXT(rtnh)) {
+ RTNH_OK(rtnh, nh_len); rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) {
size_t len = rtnh->rtnh_len - sizeof(*rtnh);
struct rtattr *rta_inner;
@@ -332,7 +389,7 @@ bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
* @af: Address family
* @gw: Default gateway to fill on NL_GET
*
- * Return: 0 on success, negative error code on failure
+ * Return: error on netlink failure, or 0 (gw unset if default route not found)
*/
int nl_route_get_def(int s, unsigned int ifi, sa_family_t af, void *gw)
{
@@ -479,7 +536,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
.ifi = ifi_src,
};
- ssize_t nlmsgs_size, status;
+ ssize_t nlmsgs_size, left, status;
unsigned dup_routes = 0;
struct nlmsghdr *nh;
char buf[NLBUFSIZ];
@@ -493,39 +550,83 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
* routes in the buffer at once.
*/
nh = nl_next(s_src, buf, NULL, &nlmsgs_size);
- for (status = nlmsgs_size;
- NLMSG_OK(nh, status) && (status = nl_status(nh, status, seq)) > 0;
- nh = NLMSG_NEXT(nh, status)) {
+ for (left = nlmsgs_size;
+ NLMSG_OK(nh, left) && (status = nl_status(nh, left, seq)) > 0;
+ nh = NLMSG_NEXT(nh, left)) {
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ bool discard = false;
struct rtattr *rta;
size_t na;
if (nh->nlmsg_type != RTM_NEWROUTE)
continue;
- dup_routes++;
-
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
+ /* RTA_OIF and RTA_MULTIPATH attributes carry the
+ * identifier of a host interface. If they match the
+ * host interface we're copying from, change them to
+ * match the corresponding identifier in the target
+ * namespace.
+ *
+ * If RTA_OIF doesn't match (NETLINK_GET_STRICT_CHK not
+ * available), or if any interface index in nexthop
+ * objects differ from the host interface, discard the
+ * route altogether.
+ */
if (rta->rta_type == RTA_OIF) {
- /* The host obviously list's the host interface
- * id here, we need to change it to the
- * namespace's interface id
- */
+ if (*(unsigned int *)RTA_DATA(rta) != ifi_src) {
+ discard = true;
+ break;
+ }
+
*(unsigned int *)RTA_DATA(rta) = ifi_dst;
- } else if (rta->rta_type == RTA_PREFSRC) {
- /* Host routes might include a preferred source
- * address, which must be one of the host's
- * addresses. However, with -a pasta will use a
- * different namespace address, making such a
- * route invalid in the namespace. Strip off
- * RTA_PREFSRC attributes to avoid that. */
+ } else if (rta->rta_type == RTA_MULTIPATH) {
+ int nh_len = RTA_PAYLOAD(rta);
+ struct rtnexthop *rtnh;
+
+ for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
+ RTNH_OK(rtnh, nh_len);
+ rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) {
+ int src = (int)ifi_src;
+
+ if (rtnh->rtnh_ifindex != src) {
+ discard = true;
+ break;
+ }
+
+ rtnh->rtnh_ifindex = ifi_dst;
+ }
+
+ if (discard)
+ break;
+ } else if (rta->rta_type == RTA_PREFSRC ||
+ rta->rta_type == RTA_NH_ID) {
+ /* Strip RTA_PREFSRC attributes: host routes
+ * might include a preferred source address,
+ * which must be one of the host's addresses.
+ * However, with -a, pasta will use a different
+ * namespace address, making such a route
+ * invalid in the namespace.
+ *
+ * Strip RTA_NH_ID attributes: host routes set
+ * up via routing protocols (e.g. OSPF) might
+ * contain a nexthop ID (and not nexthop
+ * objects, which are taken care of in the
+ * RTA_MULTIPATH case above) that's not valid
+ * in the target namespace.
+ */
rta->rta_type = RTA_UNSPEC;
}
}
+
+ if (discard)
+ nh->nlmsg_type = NLMSG_NOOP;
+ else
+ dup_routes++;
}
- if (!NLMSG_OK(nh, status) || status > 0) {
+ if (!NLMSG_OK(nh, left)) {
/* Process any remaining datagrams in a different
* buffer so we don't overwrite the first one.
*/
@@ -551,9 +652,9 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
* to calculate dependencies: let the kernel do that.
*/
for (i = 0; i < dup_routes; i++) {
- for (nh = (struct nlmsghdr *)buf, status = nlmsgs_size;
- NLMSG_OK(nh, status);
- nh = NLMSG_NEXT(nh, status)) {
+ for (nh = (struct nlmsghdr *)buf, left = nlmsgs_size;
+ NLMSG_OK(nh, left);
+ nh = NLMSG_NEXT(nh, left)) {
uint16_t flags = nh->nlmsg_flags;
int rc;
@@ -563,7 +664,8 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
rc = nl_do(s_dst, nh, RTM_NEWROUTE,
(flags & ~NLM_F_DUMP_FILTERED) | NLM_F_CREATE,
nh->nlmsg_len);
- if (rc < 0 && rc != -ENETUNREACH && rc != -EEXIST)
+ if (rc < 0 && rc != -EEXIST &&
+ rc != -ENETUNREACH && rc != -EHOSTUNREACH)
return rc;
}
}
@@ -572,6 +674,63 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
}
/**
+ * nl_addr_set_ll_nodad() - Set IFA_F_NODAD on IPv6 link-local addresses
+ * @s: Netlink socket
+ * @ifi: Interface index in target namespace
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_set_ll_nodad(int s, unsigned int ifi)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ uint32_t seq, last_seq = 0;
+ ssize_t status, ret = 0;
+ struct nlmsghdr *nh;
+ char buf[NLBUFSIZ];
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK)
+ continue;
+
+ ifa->ifa_flags |= IFA_F_NODAD;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ /* If 32-bit flags are used, add IFA_F_NODAD there */
+ if (rta->rta_type == IFA_FLAGS)
+ *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD;
+ }
+
+ last_seq = nl_send(s, nh, RTM_NEWADDR, NLM_F_REPLACE,
+ nh->nlmsg_len);
+ }
+
+ if (status < 0)
+ ret = status;
+
+ for (seq = seq + 1; seq <= last_seq; seq++) {
+ nl_foreach(nh, status, s, buf, seq)
+ warn("netlink: Unexpected response message");
+
+ if (!ret && status < 0)
+ ret = status;
+ }
+
+ return ret;
+}
+
+/**
* nl_addr_get() - Get most specific global address, given interface and family
* @s: Netlink socket
* @ifi: Interface index in outer network namespace
@@ -580,7 +739,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
* @prefix_len: Mask or prefix length, to fill (for IPv4)
* @addr_l: Link-scoped address to fill (for IPv6)
*
- * Return: 9 on success, negative error code on failure
+ * Return: 0 on success, negative error code on failure
*/
int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
void *addr, int *prefix_len, void *addr_l)
@@ -604,12 +763,13 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
struct rtattr *rta;
size_t na;
- if (ifa->ifa_index != ifi)
+ if (ifa->ifa_index != ifi || ifa->ifa_flags & IFA_F_DEPRECATED)
continue;
for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != IFA_ADDRESS)
+ if ((af == AF_INET && rta->rta_type != IFA_LOCAL) ||
+ (af == AF_INET6 && rta->rta_type != IFA_ADDRESS))
continue;
if (af == AF_INET && ifa->ifa_prefixlen > prefix_max) {
@@ -637,7 +797,54 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
}
/**
- * nl_add_set() - Set IP addresses for given interface and address family
+ * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface
+ * @s: Netlink socket
+ * @ifi: Interface index in outer network namespace
+ * @addr: Link-local address to fill
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ struct nlmsghdr *nh;
+ bool found = false;
+ char buf[NLBUFSIZ];
+ ssize_t status;
+ uint32_t seq;
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK ||
+ found)
+ continue;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFA_ADDRESS)
+ continue;
+
+ if (!found) {
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ found = true;
+ }
+ }
+ }
+ return status;
+}
+
+/**
+ * nl_addr_set() - Set IP addresses for given interface and address family
* @s: Netlink socket
* @ifi: Interface index
* @af: Address family
@@ -740,10 +947,13 @@ int nl_addr_dup(int s_src, unsigned int ifi_src,
ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
if (rc < 0 || ifa->ifa_scope == RT_SCOPE_LINK ||
- ifa->ifa_index != ifi_src)
+ ifa->ifa_index != ifi_src ||
+ ifa->ifa_flags & IFA_F_DEPRECATED)
continue;
ifa->ifa_index = ifi_dst;
+ /* Same as nl_addr_set(), but here it's more than a default */
+ ifa->ifa_flags |= IFA_F_NODAD;
for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
@@ -751,6 +961,10 @@ int nl_addr_dup(int s_src, unsigned int ifi_src,
if (rta->rta_type == IFA_LABEL ||
rta->rta_type == IFA_CACHEINFO)
rta->rta_type = IFA_UNSPEC;
+
+ /* If 32-bit flags are used, add IFA_F_NODAD there */
+ if (rta->rta_type == IFA_FLAGS)
+ *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD;
}
rc = nl_do(s_dst, nh, RTM_NEWADDR,
@@ -832,14 +1046,14 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac)
}
/**
- * nl_link_up() - Bring link up
+ * nl_link_set_mtu() - Set link MTU
* @s: Netlink socket
* @ifi: Interface index
- * @mtu: If non-zero, set interface MTU
+ * @mtu: Interface MTU
*
* Return: 0 on success, negative error code on failure
*/
-int nl_link_up(int s, unsigned int ifi, int mtu)
+int nl_link_set_mtu(int s, unsigned int ifi, int mtu)
{
struct req_t {
struct nlmsghdr nlh;
@@ -849,17 +1063,35 @@ int nl_link_up(int s, unsigned int ifi, int mtu)
} req = {
.ifm.ifi_family = AF_UNSPEC,
.ifm.ifi_index = ifi,
- .ifm.ifi_flags = IFF_UP,
- .ifm.ifi_change = IFF_UP,
.rta.rta_type = IFLA_MTU,
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
.mtu = mtu,
};
- ssize_t len = sizeof(req);
- if (!mtu)
- /* Shorten request to drop MTU attribute */
- len = offsetof(struct req_t, rta);
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
+}
+
+/**
+ * nl_link_set_flags() - Set link flags
+ * @s: Netlink socket
+ * @ifi: Interface index
+ * @set: Device flags to set
+ * @change: Mask of device flag changes
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_link_set_flags(int s, unsigned int ifi,
+ unsigned int set, unsigned int change)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .ifm.ifi_family = AF_UNSPEC,
+ .ifm.ifi_index = ifi,
+ .ifm.ifi_flags = set,
+ .ifm.ifi_change = change,
+ };
- return nl_do(s, &req, RTM_NEWLINK, 0, len);
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
}