diff options
Diffstat (limited to 'netlink.c')
-rw-r--r-- | netlink.c | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/netlink.c b/netlink.c new file mode 100644 index 0000000..ca2e77f --- /dev/null +++ b/netlink.c @@ -0,0 +1,514 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +/* PASST - Plug A Simple Socket Transport + * for qemu/UNIX domain socket mode + * + * PASTA - Pack A Subtle Tap Abstraction + * for network namespace/tap device mode + * + * netlink.c - rtnetlink routines: interfaces, addresses, routes + * + * Copyright (c) 2020-2021 Red Hat GmbH + * Author: Stefano Brivio <sbrivio@redhat.com> + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <string.h> +#include <stddef.h> +#include <errno.h> +#include <sys/types.h> +#include <limits.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <linux/if_ether.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include "util.h" +#include "passt.h" +#include "netlink.h" + +/* Socket in init, in target namespace, sequence (just needs to be monotonic) */ +static int nl_sock = -1; +static int nl_sock_ns = -1; +static int nl_seq; + +/** + * __nl_sock_init() - Set up netlink sockets in init and target namespace + * @arg: Execution context + * + * Return: 0 + */ +static int __nl_sock_init(void *arg) +{ + struct sockaddr_nl addr = { .nl_family = AF_NETLINK, }; + struct ctx *c = (struct ctx *)arg; + int *s = &nl_sock, v = 1; + +ns: + if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 || + bind(*s, (struct sockaddr *)&addr, sizeof(addr)) || + setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v))) + *s = -1; + + if (*s == -1 || !c || s == &nl_sock_ns) + return 0; + + ns_enter((struct ctx *)arg); + s = &nl_sock_ns; + goto ns; +} + +/** + * nl_sock_init() - Call __nl_sock_init() and check for failures + * @c: Execution context + * + * Return: -EIO if sockets couldn't be set up, 0 otherwise + */ +int nl_sock_init(struct ctx *c) +{ + if (c->mode == MODE_PASTA) { + NS_CALL(__nl_sock_init, c); + if (nl_sock_ns == -1) + return -EIO; + } else { + __nl_sock_init(NULL); + } + + if (nl_sock == -1) + return -EIO; + + return 0; +} + +/** + * nl_req() - Send netlink request and read response + * @ns: Use netlink socket in namespace + * @buf: Buffer for response (at least BUFSIZ long) + * @req: Request with netlink header + * @len: Request length + * + * Return: received length on success, negative error code on failure + */ +static int nl_req(int ns, char *buf, void *req, ssize_t len) +{ + int n, s = ns ? nl_sock_ns : nl_sock, done = 0; + char flush[BUFSIZ]; + + while (!done && (n = recv(s, flush, sizeof(flush), MSG_DONTWAIT)) > 0) { + struct nlmsghdr *nh = (struct nlmsghdr *)flush; + + for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) { + if (nh->nlmsg_type == NLMSG_DONE || + nh->nlmsg_type == NLMSG_ERROR) { + done = 1; + break; + } + } + } + + if ((send(s, req, len, 0) < len) || (len = recv(s, buf, BUFSIZ, 0)) < 0) + return -errno; + + return len; +} + +/** + * nl_get_ext_if() - Get interface index supporting IP versions being probed + * @v4: Probe IPv4 support, set to ENABLED or DISABLED on return + * @v6: Probe IPv4 support, set to ENABLED or DISABLED on return + * + * Return: interface index, 0 if not found + */ +unsigned int nl_get_ext_if(int *v4, int *v6) +{ + struct { struct nlmsghdr nlh; struct rtmsg rtm; } req = { + .nlh.nlmsg_type = RTM_GETROUTE, + .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL, + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), + .nlh.nlmsg_seq = nl_seq++, + + .rtm.rtm_table = RT_TABLE_MAIN, + .rtm.rtm_scope = RT_SCOPE_UNIVERSE, + .rtm.rtm_type = RTN_UNICAST, + }; + unsigned int i, first_v4 = 0, first_v6 = 0; + uint8_t has_v4[PAGE_SIZE * 8 / 8] = { 0 }; /* See __dev_alloc_name() */ + uint8_t has_v6[PAGE_SIZE * 8 / 8] = { 0 }; /* in kernel */ + struct nlmsghdr *nh; + struct rtattr *rta; + struct rtmsg *rtm; + char buf[BUFSIZ]; + long *word, tmp; + int n, na, *v; + uint8_t *vmap; + + if (*v4 == IP_VERSION_PROBE) { + v = v4; + req.rtm.rtm_family = AF_INET; + vmap = has_v4; + } else if (*v6 == IP_VERSION_PROBE) { +v6: + v = v6; + req.rtm.rtm_family = AF_INET6; + vmap = has_v6; + } else { + return 0; + } + + n = nl_req(0, buf, &req, sizeof(req)); + nh = (struct nlmsghdr *)buf; + + for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) { + rtm = (struct rtmsg *)NLMSG_DATA(nh); + + if (rtm->rtm_dst_len || rtm->rtm_family != req.rtm.rtm_family) + continue; + + for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh); + RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + unsigned int ifi; + + if (rta->rta_type != RTA_OIF) + continue; + + ifi = *(unsigned int *)RTA_DATA(rta); + + if (*v4 == IP_VERSION_DISABLED || + *v6 == IP_VERSION_DISABLED) { + *v = IP_VERSION_ENABLED; + return ifi; + } + + if (v == v4 && !first_v4) + first_v4 = ifi; + + if (v == v6 && !first_v6) + first_v6 = ifi; + + bitmap_set(vmap, ifi); + } + } + + if (v == v4 && *v6 == IP_VERSION_PROBE) { + req.nlh.nlmsg_seq = nl_seq++; + goto v6; + } + + word = (long *)has_v4; + for (i = 0; i < ARRAY_SIZE(has_v4) / sizeof(long); i++, word++) { + int ifi; + + tmp = *word; + while ((n = ffsl(tmp))) { + ifi = i * sizeof(long) * 8 + n - 1; + if (!first_v4) + first_v4 = ifi; + + tmp &= ~(1UL << (n - 1)); + if (bitmap_isset(has_v6, ifi)) { + *v4 = *v6 = IP_VERSION_ENABLED; + return ifi; + } + } + } + + if (first_v4) { + *v4 = IP_VERSION_ENABLED; + *v6 = IP_VERSION_DISABLED; + return first_v4; + } + + if (first_v6) { + *v4 = IP_VERSION_ENABLED; + *v6 = IP_VERSION_DISABLED; + return first_v6; + } + + err("No external routable interface for any IP protocol"); + return 0; +} + +/** + * nl_route() - Get/set default gateway for given interface and address family + * @ns: Use netlink socket in namespace + * @ifi: Interface index + * @af: Address family + * @gw: Default gateway to fill if zero, to set if not + */ +void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw) +{ + int set = (af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(gw)) || + (af == AF_INET && *(uint32_t *)gw); + struct req_t { + struct nlmsghdr nlh; + struct rtmsg rtm; + struct rtattr rta; + unsigned int ifi; + union { + struct { + struct rtattr rta_dst; + struct in6_addr d; + struct rtattr rta_gw; + struct in6_addr a; + } r6; + struct { + struct rtattr rta_dst; + uint32_t d; + struct rtattr rta_gw; + uint32_t a; + uint8_t end; + } r4; + }; + } req = { + .nlh.nlmsg_type = set ? RTM_NEWROUTE : RTM_GETROUTE, + .nlh.nlmsg_flags = NLM_F_REQUEST, + .nlh.nlmsg_seq = nl_seq++, + + .rtm.rtm_family = af, + .rtm.rtm_table = RT_TABLE_MAIN, + .rtm.rtm_scope = RT_SCOPE_UNIVERSE, + .rtm.rtm_type = RTN_UNICAST, + + .rta.rta_type = RTA_OIF, + .rta.rta_len = RTA_LENGTH(sizeof(unsigned int)), + .ifi = ifi, + }; + struct nlmsghdr *nh; + struct rtattr *rta; + struct rtmsg *rtm; + char buf[BUFSIZ]; + int n, na; + + if (set) { + if (af == AF_INET6) { + req.nlh.nlmsg_len = sizeof(req); + + req.r6.rta_dst.rta_type = RTA_DST; + req.r6.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r6.d)); + + memcpy(&req.r6.a, gw, sizeof(req.r6.a)); + req.r6.rta_gw.rta_type = RTA_GATEWAY; + req.r6.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r6.a)); + } else { + req.nlh.nlmsg_len = offsetof(struct req_t, r4.end); + + req.r4.rta_dst.rta_type = RTA_DST; + req.r4.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r4.d)); + + req.r4.a = *(uint32_t *)gw; + req.r4.rta_gw.rta_type = RTA_GATEWAY; + req.r4.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r4.a)); + } + + req.rtm.rtm_protocol = RTPROT_BOOT; + req.nlh.nlmsg_flags |= NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + } else { + req.nlh.nlmsg_len = offsetof(struct req_t, r6); + req.nlh.nlmsg_flags |= NLM_F_DUMP; + } + + n = nl_req(ns, buf, &req, req.nlh.nlmsg_len); + if (set) + return; + + nh = (struct nlmsghdr *)buf; + for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) { + if (nh->nlmsg_type != RTM_NEWROUTE) + goto next; + + rtm = (struct rtmsg *)NLMSG_DATA(nh); + if (rtm->rtm_dst_len) + continue; + + for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh); + RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + if (rta->rta_type != RTA_GATEWAY) + continue; + + memcpy(gw, RTA_DATA(rta), RTA_PAYLOAD(rta)); + return; + } + +next: + if (nh->nlmsg_type == NLMSG_DONE) + break; + } +} + +/** + * nl_addr() - Get/set IP addresses + * @ns: Use netlink socket in namespace + * @ifi: Interface index + * @af: Address family + * @addr: Global address to fill if zero, to set if not, ignored if NULL + * @addr_l: Link-scoped address to fill, NULL if not requested + */ +void nl_addr(int ns, unsigned int ifi, sa_family_t af, + void *addr, int prefix_len, void *addr_l) +{ + int set = addr && ((af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(addr)) || + (af == AF_INET && *(uint32_t *)addr)); + struct req_t { + struct nlmsghdr nlh; + struct ifaddrmsg ifa; + union { + struct { + struct rtattr rta_l; + uint32_t l; + struct rtattr rta_a; + uint32_t a; + + uint8_t end; + } a4; + struct { + struct rtattr rta_l; + struct in6_addr l; + struct rtattr rta_a; + struct in6_addr a; + } a6; + }; + } req = { + .nlh.nlmsg_type = set ? RTM_NEWADDR : RTM_GETADDR, + .nlh.nlmsg_flags = NLM_F_REQUEST, + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), + .nlh.nlmsg_seq = nl_seq++, + + .ifa.ifa_family = af, + .ifa.ifa_index = ifi, + .ifa.ifa_prefixlen = prefix_len, + }; + struct ifaddrmsg *ifa; + struct nlmsghdr *nh; + struct rtattr *rta; + char buf[BUFSIZ]; + int n, na; + + if (set) { + if (af == AF_INET6) { + req.nlh.nlmsg_len = sizeof(req); + + memcpy(&req.a6.l, addr, sizeof(req.a6.l)); + req.a6.rta_l.rta_len = RTA_LENGTH(sizeof(req.a6.l)); + req.a4.rta_l.rta_type = IFA_LOCAL; + memcpy(&req.a6.a, addr, sizeof(req.a6.a)); + req.a6.rta_a.rta_len = RTA_LENGTH(sizeof(req.a6.a)); + req.a6.rta_a.rta_type = IFA_ADDRESS; + } else { + req.nlh.nlmsg_len = offsetof(struct req_t, a4.end); + + req.a4.l = req.a4.a = *(uint32_t *)addr; + req.a4.rta_l.rta_len = RTA_LENGTH(sizeof(req.a4.l)); + req.a4.rta_l.rta_type = IFA_LOCAL; + req.a4.rta_a.rta_len = RTA_LENGTH(sizeof(req.a4.a)); + req.a4.rta_a.rta_type = IFA_ADDRESS; + } + + req.ifa.ifa_scope = RT_SCOPE_UNIVERSE; + req.nlh.nlmsg_flags |= NLM_F_CREATE | NLM_F_ACK | NLM_F_EXCL; + } else { + req.nlh.nlmsg_flags |= NLM_F_DUMP; + } + + n = nl_req(ns, buf, &req, req.nlh.nlmsg_len); + if (set) + return; + + nh = (struct nlmsghdr *)buf; + for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) { + if (nh->nlmsg_type != RTM_NEWADDR) + goto next; + + ifa = (struct ifaddrmsg *)NLMSG_DATA(nh); + + for (rta = (struct rtattr *)IFA_RTA(ifa), na = RTM_PAYLOAD(nh); + RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + if (rta->rta_type != IFA_ADDRESS) + continue; + + if (af == AF_INET && addr && !*(uint32_t *)addr) + memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta)); + else if (af == AF_INET6 && addr && + ifa->ifa_scope == RT_SCOPE_UNIVERSE && + IN6_IS_ADDR_UNSPECIFIED(addr)) + memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta)); + + if (addr_l && + af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK && + IN6_IS_ADDR_UNSPECIFIED(addr_l)) + memcpy(addr_l, RTA_DATA(rta), RTA_PAYLOAD(rta)); + } +next: + if (nh->nlmsg_type == NLMSG_DONE) + break; + } +} + +/** + * nl_link() - Get/set link attributes + * @ns: Use netlink socket in namespace + * @ifi: Interface index + * @mac: MAC address to fill, if passed as zero, to set otherwise + * @up: If set, bring up the link + */ +void nl_link(int ns, unsigned int ifi, void *mac, int up) +{ + int change = !MAC_IS_ZERO(mac) || up; + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + struct rtattr rta; + unsigned char mac[ETH_ALEN]; + } req = { + .nlh.nlmsg_type = change ? RTM_NEWLINK : RTM_GETLINK, + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_flags = NLM_F_REQUEST | (change ? NLM_F_ACK : 0), + .nlh.nlmsg_seq = nl_seq++, + .ifm.ifi_family = AF_UNSPEC, + .ifm.ifi_index = ifi, + .ifm.ifi_flags = up ? IFF_UP : 0, + .ifm.ifi_change = up ? IFF_UP : 0, + + .rta.rta_type = IFLA_ADDRESS, + .rta.rta_len = RTA_LENGTH(ETH_ALEN), + }; + struct ifinfomsg *ifm; + struct nlmsghdr *nh; + struct rtattr *rta; + char buf[BUFSIZ]; + int n, na; + + if (!MAC_IS_ZERO(mac)) { + req.nlh.nlmsg_len = sizeof(req); + memcpy(req.mac, mac, ETH_ALEN); + } + + n = nl_req(ns, buf, &req, req.nlh.nlmsg_len); + + if (!MAC_IS_ZERO(mac) || up) + return; + + nh = (struct nlmsghdr *)buf; + for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) { + if (nh->nlmsg_type != RTM_NEWLINK) + goto next; + + ifm = (struct ifinfomsg *)NLMSG_DATA(nh); + + for (rta = (struct rtattr *)IFLA_RTA(ifm), na = RTM_PAYLOAD(nh); + RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + if (rta->rta_type != IFLA_ADDRESS) + continue; + + memcpy(mac, RTA_DATA(rta), ETH_ALEN); + break; + } +next: + if (nh->nlmsg_type == NLMSG_DONE) + break; + } +} |