aboutgitcodebugslistschat
path: root/netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'netlink.c')
-rw-r--r--netlink.c286
1 files changed, 241 insertions, 45 deletions
diff --git a/netlink.c b/netlink.c
index 9b3dba2..ee9325a 100644
--- a/netlink.c
+++ b/netlink.c
@@ -33,8 +33,13 @@
#include "util.h"
#include "passt.h"
#include "log.h"
+#include "ip.h"
#include "netlink.h"
+/* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */
+#define RTNH_NEXT_AND_DEC(rtnh, attrlen) \
+ ((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh))
+
/* Netlink expects a buffer of at least 8kiB or the system page size,
* whichever is larger. 32kiB is recommended for more efficient.
* Since the largest page size on any remotely common Linux setup is
@@ -128,7 +133,7 @@ static uint32_t nl_send(int s, void *req, uint16_t type,
n = send(s, req, len, 0);
if (n < 0)
- die("netlink: Failed to send(): %s", strerror(errno));
+ die_perror("netlink: Failed to send()");
else if (n < len)
die("netlink: Short send (%zd of %zd bytes)", n, len);
@@ -184,7 +189,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t *
*n = recv(s, buf, NLBUFSIZ, 0);
if (*n < 0)
- die("netlink: Failed to recv(): %s", strerror(errno));
+ die_perror("netlink: Failed to recv()");
nh = (struct nlmsghdr *)buf;
if (!NLMSG_OK(nh, *n))
@@ -264,12 +269,12 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
size_t na;
/* Look for an interface with a default route first, failing that, look
- * for any interface with a route, and pick it only if it's the only
- * interface with a route.
+ * for any interface with a route, and pick the first one, if any.
*/
seq = nl_send(s, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWROUTE) {
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ const void *dst = NULL;
unsigned thisifi = 0;
if (rtm->rtm_family != af)
@@ -284,12 +289,27 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
rtnh = (struct rtnexthop *)RTA_DATA(rta);
thisifi = rtnh->rtnh_ifindex;
+ } else if (rta->rta_type == RTA_DST) {
+ dst = RTA_DATA(rta);
}
}
if (!thisifi)
continue; /* No interface for this route */
+ /* Skip 'lo': we should test IFF_LOOPBACK, but keep it simple */
+ if (thisifi == 1)
+ continue;
+
+ /* Skip routes to link-local addresses */
+ if (af == AF_INET && dst &&
+ IN4_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len))
+ continue;
+
+ if (af == AF_INET6 && dst &&
+ IN6_IS_PREFIX_LINKLOCAL(dst, rtm->rtm_dst_len))
+ continue;
+
if (rtm->rtm_dst_len == 0) {
/* Default route */
ndef++;
@@ -304,25 +324,26 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
}
if (status < 0)
- warn("netlink: RTM_GETROUTE failed: %s", strerror(-status));
+ warn("netlink: RTM_GETROUTE failed: %s", strerror_(-status));
if (defifi) {
- if (ndef > 1)
+ if (ndef > 1) {
info("Multiple default %s routes, picked first",
- af == AF_INET ? "IPv4" : "IPv6");
+ af_name(af));
+ }
return defifi;
}
if (anyifi) {
- if (nany == 1)
- return anyifi;
-
- info("Multiple interfaces with %s routes, use -i to select one",
- af == AF_INET ? "IPv4" : "IPv6");
+ if (nany > 1) {
+ info("Multiple interfaces with %s routes, picked first",
+ af_name(af));
+ }
+ return anyifi;
}
if (!nany)
- info("No interfaces with %s routes", af == AF_INET ? "IPv4" : "IPv6");
+ info("No interfaces with usable %s routes", af_name(af));
return 0;
}
@@ -334,14 +355,15 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
*
* Return: true if a gateway was found, false otherwise
*/
-bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
+static bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
{
+ int nh_len = RTA_PAYLOAD(rta);
struct rtnexthop *rtnh;
bool found = false;
int hops = -1;
for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
- RTNH_OK(rtnh, RTA_PAYLOAD(rta)); rtnh = RTNH_NEXT(rtnh)) {
+ RTNH_OK(rtnh, nh_len); rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) {
size_t len = rtnh->rtnh_len - sizeof(*rtnh);
struct rtattr *rta_inner;
@@ -536,32 +558,76 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
NLMSG_OK(nh, left) && (status = nl_status(nh, left, seq)) > 0;
nh = NLMSG_NEXT(nh, left)) {
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ bool discard = false;
struct rtattr *rta;
size_t na;
if (nh->nlmsg_type != RTM_NEWROUTE)
continue;
- dup_routes++;
-
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
+ /* RTA_OIF and RTA_MULTIPATH attributes carry the
+ * identifier of a host interface. If they match the
+ * host interface we're copying from, change them to
+ * match the corresponding identifier in the target
+ * namespace.
+ *
+ * If RTA_OIF doesn't match (NETLINK_GET_STRICT_CHK not
+ * available), or if any interface index in nexthop
+ * objects differ from the host interface, discard the
+ * route altogether.
+ */
if (rta->rta_type == RTA_OIF) {
- /* The host obviously list's the host interface
- * id here, we need to change it to the
- * namespace's interface id
- */
+ if (*(unsigned int *)RTA_DATA(rta) != ifi_src) {
+ discard = true;
+ break;
+ }
+
*(unsigned int *)RTA_DATA(rta) = ifi_dst;
- } else if (rta->rta_type == RTA_PREFSRC) {
- /* Host routes might include a preferred source
- * address, which must be one of the host's
- * addresses. However, with -a pasta will use a
- * different namespace address, making such a
- * route invalid in the namespace. Strip off
- * RTA_PREFSRC attributes to avoid that. */
+ } else if (rta->rta_type == RTA_MULTIPATH) {
+ int nh_len = RTA_PAYLOAD(rta);
+ struct rtnexthop *rtnh;
+
+ for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
+ RTNH_OK(rtnh, nh_len);
+ rtnh = RTNH_NEXT_AND_DEC(rtnh, nh_len)) {
+ int src = (int)ifi_src;
+
+ if (rtnh->rtnh_ifindex != src) {
+ discard = true;
+ break;
+ }
+
+ rtnh->rtnh_ifindex = ifi_dst;
+ }
+
+ if (discard)
+ break;
+ } else if (rta->rta_type == RTA_PREFSRC ||
+ rta->rta_type == RTA_NH_ID) {
+ /* Strip RTA_PREFSRC attributes: host routes
+ * might include a preferred source address,
+ * which must be one of the host's addresses.
+ * However, with -a, pasta will use a different
+ * namespace address, making such a route
+ * invalid in the namespace.
+ *
+ * Strip RTA_NH_ID attributes: host routes set
+ * up via routing protocols (e.g. OSPF) might
+ * contain a nexthop ID (and not nexthop
+ * objects, which are taken care of in the
+ * RTA_MULTIPATH case above) that's not valid
+ * in the target namespace.
+ */
rta->rta_type = RTA_UNSPEC;
}
}
+
+ if (discard)
+ nh->nlmsg_type = NLMSG_NOOP;
+ else
+ dup_routes++;
}
if (!NLMSG_OK(nh, left)) {
@@ -602,7 +668,8 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
rc = nl_do(s_dst, nh, RTM_NEWROUTE,
(flags & ~NLM_F_DUMP_FILTERED) | NLM_F_CREATE,
nh->nlmsg_len);
- if (rc < 0 && rc != -ENETUNREACH && rc != -EEXIST)
+ if (rc < 0 && rc != -EEXIST &&
+ rc != -ENETUNREACH && rc != -EHOSTUNREACH)
return rc;
}
}
@@ -611,6 +678,63 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
}
/**
+ * nl_addr_set_ll_nodad() - Set IFA_F_NODAD on IPv6 link-local addresses
+ * @s: Netlink socket
+ * @ifi: Interface index in target namespace
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_set_ll_nodad(int s, unsigned int ifi)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ uint32_t seq, last_seq = 0;
+ ssize_t status, ret = 0;
+ struct nlmsghdr *nh;
+ char buf[NLBUFSIZ];
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK)
+ continue;
+
+ ifa->ifa_flags |= IFA_F_NODAD;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ /* If 32-bit flags are used, add IFA_F_NODAD there */
+ if (rta->rta_type == IFA_FLAGS)
+ *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD;
+ }
+
+ last_seq = nl_send(s, nh, RTM_NEWADDR, NLM_F_REPLACE,
+ nh->nlmsg_len);
+ }
+
+ if (status < 0)
+ ret = status;
+
+ for (seq = seq + 1; seq <= last_seq; seq++) {
+ nl_foreach(nh, status, s, buf, seq)
+ warn("netlink: Unexpected response message");
+
+ if (!ret && status < 0)
+ ret = status;
+ }
+
+ return ret;
+}
+
+/**
* nl_addr_get() - Get most specific global address, given interface and family
* @s: Netlink socket
* @ifi: Interface index in outer network namespace
@@ -619,7 +743,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
* @prefix_len: Mask or prefix length, to fill (for IPv4)
* @addr_l: Link-scoped address to fill (for IPv6)
*
- * Return: 9 on success, negative error code on failure
+ * Return: 0 on success, negative error code on failure
*/
int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
void *addr, int *prefix_len, void *addr_l)
@@ -643,12 +767,13 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
struct rtattr *rta;
size_t na;
- if (ifa->ifa_index != ifi)
+ if (ifa->ifa_index != ifi || ifa->ifa_flags & IFA_F_DEPRECATED)
continue;
for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != IFA_ADDRESS)
+ if ((af == AF_INET && rta->rta_type != IFA_LOCAL) ||
+ (af == AF_INET6 && rta->rta_type != IFA_ADDRESS))
continue;
if (af == AF_INET && ifa->ifa_prefixlen > prefix_max) {
@@ -676,7 +801,54 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
}
/**
- * nl_add_set() - Set IP addresses for given interface and address family
+ * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface
+ * @s: Netlink socket
+ * @ifi: Interface index in outer network namespace
+ * @addr: Link-local address to fill
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .ifa.ifa_family = AF_INET6,
+ .ifa.ifa_index = ifi,
+ };
+ struct nlmsghdr *nh;
+ bool found = false;
+ char buf[NLBUFSIZ];
+ ssize_t status;
+ uint32_t seq;
+
+ seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWADDR) {
+ struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+ struct rtattr *rta;
+ size_t na;
+
+ if (ifa->ifa_index != ifi || ifa->ifa_scope != RT_SCOPE_LINK ||
+ found)
+ continue;
+
+ for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
+ rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFA_ADDRESS)
+ continue;
+
+ if (!found) {
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ found = true;
+ }
+ }
+ }
+ return status;
+}
+
+/**
+ * nl_addr_set() - Set IP addresses for given interface and address family
* @s: Netlink socket
* @ifi: Interface index
* @af: Address family
@@ -779,10 +951,13 @@ int nl_addr_dup(int s_src, unsigned int ifi_src,
ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
if (rc < 0 || ifa->ifa_scope == RT_SCOPE_LINK ||
- ifa->ifa_index != ifi_src)
+ ifa->ifa_index != ifi_src ||
+ ifa->ifa_flags & IFA_F_DEPRECATED)
continue;
ifa->ifa_index = ifi_dst;
+ /* Same as nl_addr_set(), but here it's more than a default */
+ ifa->ifa_flags |= IFA_F_NODAD;
for (rta = IFA_RTA(ifa), na = IFA_PAYLOAD(nh); RTA_OK(rta, na);
rta = RTA_NEXT(rta, na)) {
@@ -790,6 +965,10 @@ int nl_addr_dup(int s_src, unsigned int ifi_src,
if (rta->rta_type == IFA_LABEL ||
rta->rta_type == IFA_CACHEINFO)
rta->rta_type = IFA_UNSPEC;
+
+ /* If 32-bit flags are used, add IFA_F_NODAD there */
+ if (rta->rta_type == IFA_FLAGS)
+ *(uint32_t *)RTA_DATA(rta) |= IFA_F_NODAD;
}
rc = nl_do(s_dst, nh, RTM_NEWADDR,
@@ -845,7 +1024,6 @@ int nl_link_get_mac(int s, unsigned int ifi, void *mac)
/**
* nl_link_set_mac() - Set link MAC address
* @s: Netlink socket
- * @ns: Use netlink socket in namespace
* @ifi: Interface index
* @mac: MAC address to set
*
@@ -871,14 +1049,14 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac)
}
/**
- * nl_link_up() - Bring link up
+ * nl_link_set_mtu() - Set link MTU
* @s: Netlink socket
* @ifi: Interface index
- * @mtu: If non-zero, set interface MTU
+ * @mtu: Interface MTU
*
* Return: 0 on success, negative error code on failure
*/
-int nl_link_up(int s, unsigned int ifi, int mtu)
+int nl_link_set_mtu(int s, unsigned int ifi, int mtu)
{
struct req_t {
struct nlmsghdr nlh;
@@ -888,17 +1066,35 @@ int nl_link_up(int s, unsigned int ifi, int mtu)
} req = {
.ifm.ifi_family = AF_UNSPEC,
.ifm.ifi_index = ifi,
- .ifm.ifi_flags = IFF_UP,
- .ifm.ifi_change = IFF_UP,
.rta.rta_type = IFLA_MTU,
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
.mtu = mtu,
};
- ssize_t len = sizeof(req);
- if (!mtu)
- /* Shorten request to drop MTU attribute */
- len = offsetof(struct req_t, rta);
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
+}
+
+/**
+ * nl_link_set_flags() - Set link flags
+ * @s: Netlink socket
+ * @ifi: Interface index
+ * @set: Device flags to set
+ * @change: Mask of device flag changes
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_link_set_flags(int s, unsigned int ifi,
+ unsigned int set, unsigned int change)
+{
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .ifm.ifi_family = AF_UNSPEC,
+ .ifm.ifi_index = ifi,
+ .ifm.ifi_flags = set,
+ .ifm.ifi_change = change,
+ };
- return nl_do(s, &req, RTM_NEWLINK, 0, len);
+ return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
}