aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--Makefile9
-rw-r--r--conf.c448
-rw-r--r--netlink.c514
-rw-r--r--netlink.h6
-rw-r--r--passt.111
-rw-r--r--passt.c183
-rw-r--r--passt.h8
-rw-r--r--tap.c120
-rw-r--r--tcp.c2
-rw-r--r--util.c2
-rw-r--r--util.h9
11 files changed, 703 insertions, 609 deletions
diff --git a/Makefile b/Makefile
index 8a1b3c5..9f0e3bf 100644
--- a/Makefile
+++ b/Makefile
@@ -15,11 +15,12 @@ static: clean all
passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \
dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \
- siphash.c siphash.h tap.c tap.h icmp.c icmp.h tcp.c tcp.h \
- udp.c udp.h util.c util.h
+ netlink.c netlink.h pasta.c pasta.h siphash.c siphash.h tap.c tap.h \
+ icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h
$(CC) $(CFLAGS) \
- passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c \
- pcap.c ndp.c siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt
+ passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c pasta.c pcap.c \
+ ndp.c netlink.c siphash.c tap.c icmp.c tcp.c udp.c util.c \
+ -o passt
pasta: passt
ln -s passt pasta
diff --git a/conf.c b/conf.c
index e020417..6399fbb 100644
--- a/conf.c
+++ b/conf.c
@@ -17,7 +17,6 @@
#include <getopt.h>
#include <string.h>
#include <errno.h>
-#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -38,6 +37,8 @@
#include "passt.h"
#include "udp.h"
#include "tcp.h"
+#include "netlink.h"
+#include "pasta.h"
/**
* get_bound_ports() - Get maps of ports with bound sockets
@@ -267,301 +268,6 @@ overlap:
}
/**
- * nl_req() - Send netlink request and read response, doesn't return on failure
- * @buf: Buffer for response (BUFSIZ long)
- * @req: Request with netlink header
- * @len: Request length
- */
-static void nl_req(char *buf, void *req, ssize_t len)
-{
- int s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE), v = 1;
- struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
-
- if (s < 0 ||
- setsockopt(s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v)) ||
- bind(s, (struct sockaddr *)&addr, sizeof(addr)) ||
- (send(s, req, len, 0) < len) ||
- (recv(s, buf, BUFSIZ, 0) < 0)) {
- perror("netlink recv");
- exit(EXIT_FAILURE);
- }
-
- close(s);
-}
-
-/**
- * get_routes() - Get default route and fill in routable interface name
- * @c: Execution context
- */
-static void get_routes(struct ctx *c)
-{
- struct { struct nlmsghdr nlh; struct rtmsg rtm; } req = {
- .nlh.nlmsg_type = RTM_GETROUTE,
- .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
- .nlh.nlmsg_seq = 1,
- .rtm.rtm_family = AF_INET,
- .rtm.rtm_table = RT_TABLE_MAIN,
- .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
- .rtm.rtm_type = RTN_UNICAST,
- };
- char ifn[IFNAMSIZ], buf[BUFSIZ];
- struct nlmsghdr *nh;
- struct rtattr *rta;
- struct rtmsg *rtm;
- int n, na, v4, v6;
-
- if (!c->v4 && !c->v6)
- v4 = v6 = -1;
- else
- v6 = -!(v4 = -c->v4);
-
-v6:
- nl_req(buf, &req, sizeof(req));
- nh = (struct nlmsghdr *)buf;
-
- for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
- rtm = (struct rtmsg *)NLMSG_DATA(nh);
-
- if (rtm->rtm_dst_len ||
- (rtm->rtm_family != AF_INET && rtm->rtm_family != AF_INET6))
- continue;
-
- /* Filter on interface only if already given */
- if (*c->ifn) {
- *ifn = 0;
- for (rta = (struct rtattr *)RTM_RTA(rtm),
- na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != RTA_OIF)
- continue;
-
- if_indextoname(*(unsigned *)RTA_DATA(rta), ifn);
- break;
- }
-
- if (strcmp(ifn, c->ifn))
- goto next;
- }
-
- for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (!*c->ifn && rta->rta_type == RTA_OIF)
- if_indextoname(*(unsigned *)RTA_DATA(rta), ifn);
-
- if (v4 && rta->rta_type == RTA_GATEWAY &&
- rtm->rtm_family == AF_INET) {
- if (!c->gw4) {
- memcpy(&c->gw4, RTA_DATA(rta),
- sizeof(c->gw4));
- }
- v4 = 1;
- }
-
- if (v6 && rta->rta_type == RTA_GATEWAY &&
- rtm->rtm_family == AF_INET6) {
- if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6)) {
- memcpy(&c->gw6, RTA_DATA(rta),
- sizeof(c->gw6));
- }
- v6 = 1;
- }
- }
-
-next:
- if (nh->nlmsg_type == NLMSG_DONE)
- break;
- }
-
- if (v6 < 0 && req.rtm.rtm_family == AF_INET) {
- req.rtm.rtm_family = AF_INET6;
- req.nlh.nlmsg_seq++;
- goto v6;
- } else if (v6 < 0) {
- v6 = 0;
- }
-
- if ((v4 <= 0 && v6 <= 0) || (!*c->ifn && !*ifn)) {
- err("No routing information");
- exit(EXIT_FAILURE);
- }
-
- if (!*c->ifn)
- strncpy(c->ifn, ifn, IFNAMSIZ);
- c->v4 = v4;
- c->v6 = v6;
-}
-
-/**
- * get_l3_addrs() - Fetch IP addresses of external routable interface
- * @c: Execution context
- */
-static void get_l3_addrs(struct ctx *c)
-{
- struct { struct nlmsghdr nlh; struct ifaddrmsg ifa; } req = {
- .nlh.nlmsg_type = RTM_GETADDR,
- .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
- .nlh.nlmsg_seq = 1,
- .ifa.ifa_family = AF_INET,
- .ifa.ifa_index = if_nametoindex(c->ifn),
- };
- struct ifaddrmsg *ifa;
- struct nlmsghdr *nh;
- struct rtattr *rta;
- int n, na, v4, v6;
- char buf[BUFSIZ];
-
- if (c->v4) {
- v4 = -1;
- if ((c->addr4_seen = c->addr4))
- v4 = 1;
- }
-
- if (c->v6) {
- v6 = -2;
- if (!IN6_IS_ADDR_UNSPECIFIED(&c->addr6)) {
- memcpy(&c->addr6_seen, &c->addr6, sizeof(c->addr6));
- memcpy(&c->addr6_ll_seen, &c->addr6, sizeof(c->addr6));
- v6 = -1;
- }
- }
-
-next_v:
- if (v4 < 0)
- req.ifa.ifa_family = AF_INET;
- else if (v6 < 0)
- req.ifa.ifa_family = AF_INET6;
- else
- goto mask_only;
-
- nl_req(buf, &req, sizeof(req));
- nh = (struct nlmsghdr *)buf;
-
- for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
- if (nh->nlmsg_type != RTM_NEWADDR)
- goto next;
-
- ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
-
- for (rta = (struct rtattr *)IFA_RTA(ifa), na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != IFA_ADDRESS)
- continue;
-
- if (v4 < 0) {
- memcpy(&c->addr4, RTA_DATA(rta),
- sizeof(c->addr4));
- memcpy(&c->addr4_seen, RTA_DATA(rta),
- sizeof(c->addr4_seen));
- v4 = 1;
- } else if (v6 < 0) {
- if (v6 == -2 &&
- ifa->ifa_scope == RT_SCOPE_UNIVERSE) {
- memcpy(&c->addr6, RTA_DATA(rta),
- sizeof(c->addr6));
- memcpy(&c->addr6_seen, RTA_DATA(rta),
- sizeof(c->addr6_seen));
- memcpy(&c->addr6_ll_seen, RTA_DATA(rta),
- sizeof(c->addr6_ll_seen));
- } else if (ifa->ifa_scope == RT_SCOPE_LINK) {
- memcpy(&c->addr6_ll, RTA_DATA(rta),
- sizeof(c->addr6_ll));
- }
- if (!IN6_IS_ADDR_UNSPECIFIED(&c->addr6) &&
- !IN6_IS_ADDR_UNSPECIFIED(&c->addr6_ll))
- v6 = 1;
- }
- }
-next:
- if (nh->nlmsg_type == NLMSG_DONE)
- break;
- }
-
- if (v4 >= 0 && v6 < 0)
- goto next_v;
-
- if (v4 < c->v4 || v6 < c->v6)
- goto out;
-
-mask_only:
- if (v4 && !c->mask4) {
- if (IN_CLASSA(ntohl(c->addr4)))
- c->mask4 = htonl(IN_CLASSA_NET);
- else if (IN_CLASSB(ntohl(c->addr4)))
- c->mask4 = htonl(IN_CLASSB_NET);
- else if (IN_CLASSC(ntohl(c->addr4)))
- c->mask4 = htonl(IN_CLASSC_NET);
- else
- c->mask4 = 0xffffffff;
- }
-
- return;
-out:
- err("Couldn't get addresses for routable interface");
- exit(EXIT_FAILURE);
-}
-
-/**
- * get_l2_addr() - Fetch hardware addresses of external routable interface
- * @c: Execution context
- */
-static void get_l2_addr(struct ctx *c)
-{
- struct { struct nlmsghdr nlh; struct ifinfomsg ifi; } req = {
- .nlh.nlmsg_type = RTM_GETLINK,
- .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP_FILTERED,
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
- .nlh.nlmsg_seq = 1,
- .ifi.ifi_family = AF_UNSPEC,
- .ifi.ifi_index = if_nametoindex(c->ifn),
- };
- struct ifinfomsg *ifi;
- struct nlmsghdr *nh;
- struct rtattr *rta;
- char buf[BUFSIZ];
- int n, na;
-
- if (memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
- goto mac_guest;
-
- nl_req(buf, &req, sizeof(req));
- nh = (struct nlmsghdr *)buf;
-
- for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
- if (nh->nlmsg_type != RTM_NEWLINK)
- goto next;
-
- ifi = (struct ifinfomsg *)NLMSG_DATA(nh);
-
- for (rta = (struct rtattr *)IFLA_RTA(ifi), na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != IFLA_ADDRESS)
- continue;
-
- memcpy(c->mac, RTA_DATA(rta), ETH_ALEN);
- break;
- }
-next:
- if (nh->nlmsg_type == NLMSG_DONE)
- break;
- }
-
- if (!memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
- goto out;
-
-mac_guest:
- if (memcmp(c->mac_guest, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
- memset(&c->mac_guest, 0xff, sizeof(c->mac_guest));
-
- return;
-
-out:
- err("Couldn't get hardware address for routable interface");
- exit(EXIT_FAILURE);
-}
-
-/**
* get_dns() - Get nameserver addresses from local /etc/resolv.conf
* @c: Execution context
*/
@@ -731,6 +437,91 @@ static int conf_ns_opt(struct ctx *c,
}
/**
+ * conf_ip() - Verify or detect IPv4/IPv6 support, get relevant addresses
+ * @c: Execution context
+ */
+static void conf_ip(struct ctx *c)
+{
+ int v4, v6;
+
+ if (c->v4) {
+ c->v4 = IP_VERSION_ENABLED;
+ v4 = IP_VERSION_PROBE;
+ v6 = c->v6 = IP_VERSION_DISABLED;
+ } else if (c->v6) {
+ c->v6 = IP_VERSION_ENABLED;
+ v6 = IP_VERSION_PROBE;
+ v4 = c->v4 = IP_VERSION_DISABLED;
+ } else {
+ c->v4 = c->v6 = IP_VERSION_ENABLED;
+ v4 = v6 = IP_VERSION_PROBE;
+ }
+
+ if (!c->ifi)
+ c->ifi = nl_get_ext_if(&v4, &v6);
+
+ if (v4 != IP_VERSION_DISABLED) {
+ if (!c->gw4)
+ nl_route(0, c->ifi, AF_INET, &c->gw4);
+
+ if (!c->addr4) {
+ nl_addr(0, c->ifi, AF_INET, &c->addr4, 0, NULL);
+ if (!c->mask4) {
+ if (IN_CLASSA(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSA_NET);
+ else if (IN_CLASSB(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSB_NET);
+ else if (IN_CLASSC(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSC_NET);
+ else
+ c->mask4 = 0xffffffff;
+ }
+ }
+
+ memcpy(&c->addr4_seen, &c->addr4, sizeof(c->addr4_seen));
+
+ if (!memcmp(c->mac, MAC_ZERO, ETH_ALEN))
+ nl_link(0, c->ifi, c->mac, 0);
+ }
+
+ if (c->mode == MODE_PASST)
+ memset(&c->mac_guest, 0xff, sizeof(c->mac_guest));
+
+ if (v6 != IP_VERSION_DISABLED) {
+ if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6))
+ nl_route(0, c->ifi, AF_INET6, &c->gw6);
+
+ nl_addr(0, c->ifi, AF_INET6,
+ IN6_IS_ADDR_UNSPECIFIED(&c->addr6) ? &c->addr6 : NULL,
+ 0, &c->addr6_ll);
+
+ memcpy(&c->addr6_seen, &c->addr6, sizeof(c->addr4_seen));
+ memcpy(&c->addr6_ll_seen, &c->addr6, sizeof(c->addr4_seen));
+ }
+
+ if (!c->gw4 || !c->addr4 ||
+ !memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
+ v4 = IP_VERSION_DISABLED;
+ else
+ v4 = IP_VERSION_ENABLED;
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6) ||
+ IN6_IS_ADDR_UNSPECIFIED(&c->addr6) ||
+ IN6_IS_ADDR_UNSPECIFIED(&c->addr6_ll))
+ v6 = IP_VERSION_DISABLED;
+ else
+ v6 = IP_VERSION_ENABLED;
+
+ if ((v4 == IP_VERSION_DISABLED) && (v6 == IP_VERSION_DISABLED)) {
+ err("External interface not usable");
+ exit(EXIT_FAILURE);
+ }
+
+ c->v4 = v4;
+ c->v6 = v6;
+}
+
+/**
* usage() - Print usage and exit
* @name: Executable name
*/
@@ -868,20 +659,22 @@ pasta_opts:
info( " implied if PATH or NAME are given without --userns");
info( " --nsrun-dir Directory for nsfs mountpoints");
info( " default: " NETNS_RUN_DIR);
+ info( " --config-net Configure tap interface in namespace");
+ info( " --ns-mac-addr ADDR Set MAC address on tap interface");
exit(EXIT_FAILURE);
}
void conf_print(struct ctx *c)
{
- char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN];
+ char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN], ifn[IFNAMSIZ];
int i;
if (c->mode == MODE_PASTA) {
info("Outbound interface: %s, namespace interface: %s",
- c->ifn, c->pasta_ifn);
+ if_indextoname(c->ifi, ifn), c->pasta_ifn);
} else {
- info("Outbound interface: %s", c->ifn);
+ info("Outbound interface: %s", if_indextoname(c->ifi, ifn));
}
if (c->v4) {
@@ -991,6 +784,8 @@ void conf(struct ctx *c, int argc, char **argv)
{"userns", required_argument, NULL, 2 },
{"netns-only", no_argument, &c->netns_only, 1 },
{"nsrun-dir", required_argument, NULL, 3 },
+ {"config-net", no_argument, &c->pasta_conf_ns, 1 },
+ {"ns-mac-addr", required_argument, NULL, 4 },
{ 0 },
};
struct get_bound_ports_ns_arg ns_ports_arg = { .c = c };
@@ -1051,6 +846,22 @@ void conf(struct ctx *c, int argc, char **argv)
usage(argv[0]);
}
break;
+ case 4:
+ if (c->mode != MODE_PASTA) {
+ err("--ns-mac-addr is for pasta mode only");
+ usage(argv[0]);
+ }
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ errno = 0;
+ b = strtol(optarg + i * 3, NULL, 16);
+ if (b < 0 || b > UCHAR_MAX || errno) {
+ err("Invalid MAC address: %s", optarg);
+ usage(argv[0]);
+ }
+ c->mac_guest[i] = b;
+ }
+ break;
case 'd':
if (c->debug) {
err("Multiple --debug options given");
@@ -1217,12 +1028,16 @@ void conf(struct ctx *c, int argc, char **argv)
usage(argv[0]);
break;
case 'i':
- if (*c->ifn) {
+ if (c->ifi) {
err("Redundant interface: %s", optarg);
usage(argv[0]);
}
- strncpy(c->ifn, optarg, IFNAMSIZ - 1);
+ if (!(c->ifi = if_nametoindex(optarg))) {
+ err("Invalid interface name %s: %s", optarg,
+ strerror(errno));
+ usage(argv[0]);
+ }
break;
case 'D':
if (c->no_dns ||
@@ -1320,25 +1135,26 @@ void conf(struct ctx *c, int argc, char **argv)
usage(argv[0]);
}
- if (c->v4 || c->v6) {
- if (!c->v4)
- c->no_dhcp = 1;
+ if (c->mode == MODE_PASTA && !c->pasta_netns_fd)
+ pasta_start_ns(c);
- if (!c->v6) {
- c->no_ndp = 1;
- c->no_dhcpv6 = 1;
- }
+ if (nl_sock_init(c)) {
+ err("Failed to get netlink socket");
+ exit(EXIT_FAILURE);
}
- if (!c->mtu) {
- c->mtu = (ETH_MAX_MTU - ETH_HLEN) /
- sizeof(uint32_t) * sizeof(uint32_t);
+ conf_ip(c);
+
+ if (!c->v4)
+ c->no_dhcp = 1;
+
+ if (!c->v6) {
+ c->no_ndp = 1;
+ c->no_dhcpv6 = 1;
}
- get_routes(c);
- get_l3_addrs(c);
- if (c->v4)
- get_l2_addr(c);
+ if (!c->mtu)
+ c->mtu = ROUND_DOWN(ETH_MAX_MTU - ETH_HLEN, sizeof(uint32_t));
if (c->mode == MODE_PASTA && dns4 == c->dns4 && dns6 == c->dns6)
c->no_dns = 1;
@@ -1347,7 +1163,7 @@ void conf(struct ctx *c, int argc, char **argv)
get_dns(c);
if (!*c->pasta_ifn)
- strncpy(c->pasta_ifn, c->ifn, IFNAMSIZ);
+ if_indextoname(c->ifi, c->pasta_ifn);
#ifdef PASST_LEGACY_NO_OPTIONS
if (c->mode == MODE_PASST) {
diff --git a/netlink.c b/netlink.c
new file mode 100644
index 0000000..ca2e77f
--- /dev/null
+++ b/netlink.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ * for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ * for network namespace/tap device mode
+ *
+ * netlink.c - rtnetlink routines: interfaces, addresses, routes
+ *
+ * Copyright (c) 2020-2021 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <string.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include "util.h"
+#include "passt.h"
+#include "netlink.h"
+
+/* Socket in init, in target namespace, sequence (just needs to be monotonic) */
+static int nl_sock = -1;
+static int nl_sock_ns = -1;
+static int nl_seq;
+
+/**
+ * __nl_sock_init() - Set up netlink sockets in init and target namespace
+ * @arg: Execution context
+ *
+ * Return: 0
+ */
+static int __nl_sock_init(void *arg)
+{
+ struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
+ struct ctx *c = (struct ctx *)arg;
+ int *s = &nl_sock, v = 1;
+
+ns:
+ if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 ||
+ bind(*s, (struct sockaddr *)&addr, sizeof(addr)) ||
+ setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v)))
+ *s = -1;
+
+ if (*s == -1 || !c || s == &nl_sock_ns)
+ return 0;
+
+ ns_enter((struct ctx *)arg);
+ s = &nl_sock_ns;
+ goto ns;
+}
+
+/**
+ * nl_sock_init() - Call __nl_sock_init() and check for failures
+ * @c: Execution context
+ *
+ * Return: -EIO if sockets couldn't be set up, 0 otherwise
+ */
+int nl_sock_init(struct ctx *c)
+{
+ if (c->mode == MODE_PASTA) {
+ NS_CALL(__nl_sock_init, c);
+ if (nl_sock_ns == -1)
+ return -EIO;
+ } else {
+ __nl_sock_init(NULL);
+ }
+
+ if (nl_sock == -1)
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * nl_req() - Send netlink request and read response
+ * @ns: Use netlink socket in namespace
+ * @buf: Buffer for response (at least BUFSIZ long)
+ * @req: Request with netlink header
+ * @len: Request length
+ *
+ * Return: received length on success, negative error code on failure
+ */
+static int nl_req(int ns, char *buf, void *req, ssize_t len)
+{
+ int n, s = ns ? nl_sock_ns : nl_sock, done = 0;
+ char flush[BUFSIZ];
+
+ while (!done && (n = recv(s, flush, sizeof(flush), MSG_DONTWAIT)) > 0) {
+ struct nlmsghdr *nh = (struct nlmsghdr *)flush;
+
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type == NLMSG_DONE ||
+ nh->nlmsg_type == NLMSG_ERROR) {
+ done = 1;
+ break;
+ }
+ }
+ }
+
+ if ((send(s, req, len, 0) < len) || (len = recv(s, buf, BUFSIZ, 0)) < 0)
+ return -errno;
+
+ return len;
+}
+
+/**
+ * nl_get_ext_if() - Get interface index supporting IP versions being probed
+ * @v4: Probe IPv4 support, set to ENABLED or DISABLED on return
+ * @v6: Probe IPv4 support, set to ENABLED or DISABLED on return
+ *
+ * Return: interface index, 0 if not found
+ */
+unsigned int nl_get_ext_if(int *v4, int *v6)
+{
+ struct { struct nlmsghdr nlh; struct rtmsg rtm; } req = {
+ .nlh.nlmsg_type = RTM_GETROUTE,
+ .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
+ .nlh.nlmsg_seq = nl_seq++,
+
+ .rtm.rtm_table = RT_TABLE_MAIN,
+ .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
+ .rtm.rtm_type = RTN_UNICAST,
+ };
+ unsigned int i, first_v4 = 0, first_v6 = 0;
+ uint8_t has_v4[PAGE_SIZE * 8 / 8] = { 0 }; /* See __dev_alloc_name() */
+ uint8_t has_v6[PAGE_SIZE * 8 / 8] = { 0 }; /* in kernel */
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rtm;
+ char buf[BUFSIZ];
+ long *word, tmp;
+ int n, na, *v;
+ uint8_t *vmap;
+
+ if (*v4 == IP_VERSION_PROBE) {
+ v = v4;
+ req.rtm.rtm_family = AF_INET;
+ vmap = has_v4;
+ } else if (*v6 == IP_VERSION_PROBE) {
+v6:
+ v = v6;
+ req.rtm.rtm_family = AF_INET6;
+ vmap = has_v6;
+ } else {
+ return 0;
+ }
+
+ n = nl_req(0, buf, &req, sizeof(req));
+ nh = (struct nlmsghdr *)buf;
+
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ rtm = (struct rtmsg *)NLMSG_DATA(nh);
+
+ if (rtm->rtm_dst_len || rtm->rtm_family != req.rtm.rtm_family)
+ continue;
+
+ for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ unsigned int ifi;
+
+ if (rta->rta_type != RTA_OIF)
+ continue;
+
+ ifi = *(unsigned int *)RTA_DATA(rta);
+
+ if (*v4 == IP_VERSION_DISABLED ||
+ *v6 == IP_VERSION_DISABLED) {
+ *v = IP_VERSION_ENABLED;
+ return ifi;
+ }
+
+ if (v == v4 && !first_v4)
+ first_v4 = ifi;
+
+ if (v == v6 && !first_v6)
+ first_v6 = ifi;
+
+ bitmap_set(vmap, ifi);
+ }
+ }
+
+ if (v == v4 && *v6 == IP_VERSION_PROBE) {
+ req.nlh.nlmsg_seq = nl_seq++;
+ goto v6;
+ }
+
+ word = (long *)has_v4;
+ for (i = 0; i < ARRAY_SIZE(has_v4) / sizeof(long); i++, word++) {
+ int ifi;
+
+ tmp = *word;
+ while ((n = ffsl(tmp))) {
+ ifi = i * sizeof(long) * 8 + n - 1;
+ if (!first_v4)
+ first_v4 = ifi;
+
+ tmp &= ~(1UL << (n - 1));
+ if (bitmap_isset(has_v6, ifi)) {
+ *v4 = *v6 = IP_VERSION_ENABLED;
+ return ifi;
+ }
+ }
+ }
+
+ if (first_v4) {
+ *v4 = IP_VERSION_ENABLED;
+ *v6 = IP_VERSION_DISABLED;
+ return first_v4;
+ }
+
+ if (first_v6) {
+ *v4 = IP_VERSION_ENABLED;
+ *v6 = IP_VERSION_DISABLED;
+ return first_v6;
+ }
+
+ err("No external routable interface for any IP protocol");
+ return 0;
+}
+
+/**
+ * nl_route() - Get/set default gateway for given interface and address family
+ * @ns: Use netlink socket in namespace
+ * @ifi: Interface index
+ * @af: Address family
+ * @gw: Default gateway to fill if zero, to set if not
+ */
+void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw)
+{
+ int set = (af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(gw)) ||
+ (af == AF_INET && *(uint32_t *)gw);
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct rtmsg rtm;
+ struct rtattr rta;
+ unsigned int ifi;
+ union {
+ struct {
+ struct rtattr rta_dst;
+ struct in6_addr d;
+ struct rtattr rta_gw;
+ struct in6_addr a;
+ } r6;
+ struct {
+ struct rtattr rta_dst;
+ uint32_t d;
+ struct rtattr rta_gw;
+ uint32_t a;
+ uint8_t end;
+ } r4;
+ };
+ } req = {
+ .nlh.nlmsg_type = set ? RTM_NEWROUTE : RTM_GETROUTE,
+ .nlh.nlmsg_flags = NLM_F_REQUEST,
+ .nlh.nlmsg_seq = nl_seq++,
+
+ .rtm.rtm_family = af,
+ .rtm.rtm_table = RT_TABLE_MAIN,
+ .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
+ .rtm.rtm_type = RTN_UNICAST,
+
+ .rta.rta_type = RTA_OIF,
+ .rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
+ .ifi = ifi,
+ };
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rtm;
+ char buf[BUFSIZ];
+ int n, na;
+
+ if (set) {
+ if (af == AF_INET6) {
+ req.nlh.nlmsg_len = sizeof(req);
+
+ req.r6.rta_dst.rta_type = RTA_DST;
+ req.r6.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r6.d));
+
+ memcpy(&req.r6.a, gw, sizeof(req.r6.a));
+ req.r6.rta_gw.rta_type = RTA_GATEWAY;
+ req.r6.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r6.a));
+ } else {
+ req.nlh.nlmsg_len = offsetof(struct req_t, r4.end);
+
+ req.r4.rta_dst.rta_type = RTA_DST;
+ req.r4.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r4.d));
+
+ req.r4.a = *(uint32_t *)gw;
+ req.r4.rta_gw.rta_type = RTA_GATEWAY;
+ req.r4.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r4.a));
+ }
+
+ req.rtm.rtm_protocol = RTPROT_BOOT;
+ req.nlh.nlmsg_flags |= NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ } else {
+ req.nlh.nlmsg_len = offsetof(struct req_t, r6);
+ req.nlh.nlmsg_flags |= NLM_F_DUMP;
+ }
+
+ n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
+ if (set)
+ return;
+
+ nh = (struct nlmsghdr *)buf;
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type != RTM_NEWROUTE)
+ goto next;
+
+ rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ if (rtm->rtm_dst_len)
+ continue;
+
+ for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != RTA_GATEWAY)
+ continue;
+
+ memcpy(gw, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ return;
+ }
+
+next:
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+}
+
+/**
+ * nl_addr() - Get/set IP addresses
+ * @ns: Use netlink socket in namespace
+ * @ifi: Interface index
+ * @af: Address family
+ * @addr: Global address to fill if zero, to set if not, ignored if NULL
+ * @addr_l: Link-scoped address to fill, NULL if not requested
+ */
+void nl_addr(int ns, unsigned int ifi, sa_family_t af,
+ void *addr, int prefix_len, void *addr_l)
+{
+ int set = addr && ((af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(addr)) ||
+ (af == AF_INET && *(uint32_t *)addr));
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ union {
+ struct {
+ struct rtattr rta_l;
+ uint32_t l;
+ struct rtattr rta_a;
+ uint32_t a;
+
+ uint8_t end;
+ } a4;
+ struct {
+ struct rtattr rta_l;
+ struct in6_addr l;
+ struct rtattr rta_a;
+ struct in6_addr a;
+ } a6;
+ };
+ } req = {
+ .nlh.nlmsg_type = set ? RTM_NEWADDR : RTM_GETADDR,
+ .nlh.nlmsg_flags = NLM_F_REQUEST,
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+ .nlh.nlmsg_seq = nl_seq++,
+
+ .ifa.ifa_family = af,
+ .ifa.ifa_index = ifi,
+ .ifa.ifa_prefixlen = prefix_len,
+ };
+ struct ifaddrmsg *ifa;
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ char buf[BUFSIZ];
+ int n, na;
+
+ if (set) {
+ if (af == AF_INET6) {
+ req.nlh.nlmsg_len = sizeof(req);
+
+ memcpy(&req.a6.l, addr, sizeof(req.a6.l));
+ req.a6.rta_l.rta_len = RTA_LENGTH(sizeof(req.a6.l));
+ req.a4.rta_l.rta_type = IFA_LOCAL;
+ memcpy(&req.a6.a, addr, sizeof(req.a6.a));
+ req.a6.rta_a.rta_len = RTA_LENGTH(sizeof(req.a6.a));
+ req.a6.rta_a.rta_type = IFA_ADDRESS;
+ } else {
+ req.nlh.nlmsg_len = offsetof(struct req_t, a4.end);
+
+ req.a4.l = req.a4.a = *(uint32_t *)addr;
+ req.a4.rta_l.rta_len = RTA_LENGTH(sizeof(req.a4.l));
+ req.a4.rta_l.rta_type = IFA_LOCAL;
+ req.a4.rta_a.rta_len = RTA_LENGTH(sizeof(req.a4.a));
+ req.a4.rta_a.rta_type = IFA_ADDRESS;
+ }
+
+ req.ifa.ifa_scope = RT_SCOPE_UNIVERSE;
+ req.nlh.nlmsg_flags |= NLM_F_CREATE | NLM_F_ACK | NLM_F_EXCL;
+ } else {
+ req.nlh.nlmsg_flags |= NLM_F_DUMP;
+ }
+
+ n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
+ if (set)
+ return;
+
+ nh = (struct nlmsghdr *)buf;
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type != RTM_NEWADDR)
+ goto next;
+
+ ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+
+ for (rta = (struct rtattr *)IFA_RTA(ifa), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFA_ADDRESS)
+ continue;
+
+ if (af == AF_INET && addr && !*(uint32_t *)addr)
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ else if (af == AF_INET6 && addr &&
+ ifa->ifa_scope == RT_SCOPE_UNIVERSE &&
+ IN6_IS_ADDR_UNSPECIFIED(addr))
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+
+ if (addr_l &&
+ af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK &&
+ IN6_IS_ADDR_UNSPECIFIED(addr_l))
+ memcpy(addr_l, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ }
+next:
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+}
+
+/**
+ * nl_link() - Get/set link attributes
+ * @ns: Use netlink socket in namespace
+ * @ifi: Interface index
+ * @mac: MAC address to fill, if passed as zero, to set otherwise
+ * @up: If set, bring up the link
+ */
+void nl_link(int ns, unsigned int ifi, void *mac, int up)
+{
+ int change = !MAC_IS_ZERO(mac) || up;
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ struct rtattr rta;
+ unsigned char mac[ETH_ALEN];
+ } req = {
+ .nlh.nlmsg_type = change ? RTM_NEWLINK : RTM_GETLINK,
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_flags = NLM_F_REQUEST | (change ? NLM_F_ACK : 0),
+ .nlh.nlmsg_seq = nl_seq++,
+ .ifm.ifi_family = AF_UNSPEC,
+ .ifm.ifi_index = ifi,
+ .ifm.ifi_flags = up ? IFF_UP : 0,
+ .ifm.ifi_change = up ? IFF_UP : 0,
+
+ .rta.rta_type = IFLA_ADDRESS,
+ .rta.rta_len = RTA_LENGTH(ETH_ALEN),
+ };
+ struct ifinfomsg *ifm;
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ char buf[BUFSIZ];
+ int n, na;
+
+ if (!MAC_IS_ZERO(mac)) {
+ req.nlh.nlmsg_len = sizeof(req);
+ memcpy(req.mac, mac, ETH_ALEN);
+ }
+
+ n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
+
+ if (!MAC_IS_ZERO(mac) || up)
+ return;
+
+ nh = (struct nlmsghdr *)buf;
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type != RTM_NEWLINK)
+ goto next;
+
+ ifm = (struct ifinfomsg *)NLMSG_DATA(nh);
+
+ for (rta = (struct rtattr *)IFLA_RTA(ifm), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFLA_ADDRESS)
+ continue;
+
+ memcpy(mac, RTA_DATA(rta), ETH_ALEN);
+ break;
+ }
+next:
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+}
diff --git a/netlink.h b/netlink.h
new file mode 100644
index 0000000..654e17e
--- /dev/null
+++ b/netlink.h
@@ -0,0 +1,6 @@
+int nl_sock_init(struct ctx *c);
+unsigned int nl_get_ext_if(int *v4, int *v6);
+void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw);
+void nl_addr(int ns, unsigned int ifi, sa_family_t af,
+ void *addr, int prefix_len, void *addr_l);
+void nl_link(int ns, unsigned int ifi, void *mac, int up);
diff --git a/passt.1 b/passt.1
index b21333b..df9892b 100644
--- a/passt.1
+++ b/passt.1
@@ -375,6 +375,17 @@ Directory for nsfs mountpoints, used as path prefix for names of namespaces.
The default path is shown with --help.
+.TP
+.BR \-\-config-net
+Configure networking in the namespace: set up addresses and routes as configured
+or sourced from the host, and bring up the tap interface.
+
+.TP
+.BR \-\-ns-mac-addr " " \fIaddr
+Configure MAC address \fIaddr\fR on the tap interface in the namespace.
+
+Default is to let the tap driver build a pseudorandom hardware address.
+
.SH EXAMPLES
.SS \fBpasta
diff --git a/passt.c b/passt.c
index e0519f6..b411657 100644
--- a/passt.c
+++ b/passt.c
@@ -62,6 +62,7 @@
#include "pcap.h"
#include "tap.h"
#include "conf.h"
+#include "pasta.h"
#define EPOLL_EVENTS 8
@@ -156,178 +157,6 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
udp_update_l2_buf(eth_d, eth_s, ip_da);
}
-static int pasta_child_pid;
-static char pasta_child_ns[PATH_MAX];
-
-/**
- * pasta_ns_cleanup() - Look for processes in namespace, terminate them
- */
-static void pasta_ns_cleanup(void)
-{
- char proc_path[PATH_MAX], ns_link[PATH_MAX];
- int recheck = 0, found = 0, waited = 0;
- struct dirent *dp;
- DIR *dirp;
-
- if (!*pasta_child_ns)
- return;
-
-loop:
- if (!(dirp = opendir("/proc")))
- return;
-
- while ((dp = readdir(dirp))) {
- pid_t pid;
-
- errno = 0;
- pid = strtol(dp->d_name, NULL, 0);
- if (!pid || errno)
- continue;
-
- snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", pid);
- if (readlink(proc_path, ns_link, PATH_MAX) < 0)
- continue;
-
- if (!strncmp(ns_link, pasta_child_ns, PATH_MAX)) {
- found = 1;
- if (waited)
- kill(pid, SIGKILL);
- else
- kill(pid, SIGQUIT);
- }
- }
-
- closedir(dirp);
-
- if (!found)
- return;
-
- if (waited) {
- if (recheck) {
- info("Some processes in namespace didn't quit");
- } else {
- found = 0;
- recheck = 1;
- goto loop;
- }
- return;
- }
-
- info("Waiting for all processes in namespace to terminate");
- sleep(1);
- waited = 1;
- goto loop;
-}
-
-/**
- * pasta_child_handler() - Exit once shell spawned by pasta_start_ns() exits
- * @signal: Unused, handler deals with SIGCHLD only
- */
-static void pasta_child_handler(int signal)
-{
- siginfo_t infop;
-
- (void)signal;
-
- if (pasta_child_pid &&
- !waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) {
- if (infop.si_pid == pasta_child_pid) {
- pasta_ns_cleanup();
- exit(EXIT_SUCCESS);
- }
- }
-
- waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
- waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
-}
-
-/**
- * pasta_wait_for_ns() - Busy loop until we can enter the target namespace
- * @arg: Execution context
- *
- * Return: 0
- */
-static int pasta_wait_for_ns(void *arg)
-{
- struct ctx *c = (struct ctx *)arg;
- char ns[PATH_MAX];
-
- if (c->netns_only)
- goto netns;
-
- snprintf(ns, PATH_MAX, "/proc/%i/ns/user", pasta_child_pid);
- do
- while ((c->pasta_userns_fd = open(ns, O_RDONLY)) < 0);
- while (setns(c->pasta_userns_fd, 0) && !close(c->pasta_userns_fd));
-
-netns:
- snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid);
- do
- while ((c->pasta_netns_fd = open(ns, O_RDONLY)) < 0);
- while (setns(c->pasta_netns_fd, 0) && !close(c->pasta_netns_fd));
-
- return 0;
-}
-
-/**
- * pasta_start_ns() - Fork shell in new namespace if target ns is not given
- * @c: Execution context
- */
-static void pasta_start_ns(struct ctx *c)
-{
- char buf[BUFSIZ], *shell;
- int euid = geteuid();
- int fd;
-
- c->foreground = 1;
- if (!c->debug)
- c->quiet = 1;
-
- if ((pasta_child_pid = fork()) == -1) {
- perror("fork");
- exit(EXIT_FAILURE);
- }
-
- if (pasta_child_pid) {
- NS_CALL(pasta_wait_for_ns, c);
- return;
- }
-
- if (unshare(CLONE_NEWNET | (c->netns_only ? 0 : CLONE_NEWUSER))) {
- perror("unshare");
- exit(EXIT_FAILURE);
- }
-
- if (!c->netns_only) {
- snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1);
-
- fd = open("/proc/self/uid_map", O_WRONLY);
- write(fd, buf, strlen(buf));
- close(fd);
-
- fd = open("/proc/self/setgroups", O_WRONLY);
- write(fd, "deny", sizeof("deny"));
- close(fd);
-
- fd = open("/proc/self/gid_map", O_WRONLY);
- write(fd, buf, strlen(buf));
- close(fd);
- }
-
- fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY);
- write(fd, "0 0", strlen("0 0"));
- close(fd);
-
- shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
- if (strstr(shell, "/bash"))
- execve(shell, ((char *[]) { shell, "-l", NULL }), environ);
- else
- execve(shell, ((char *[]) { shell, NULL }), environ);
-
- perror("execve");
- exit(EXIT_FAILURE);
-}
-
/**
* main() - Entry point and main loop
* @argc: Argument count
@@ -366,20 +195,12 @@ int main(int argc, char **argv)
openlog(log_name, 0, LOG_DAEMON);
setlogmask(LOG_MASK(LOG_EMERG));
+
conf(&c, argc, argv);
if (!c.debug && (c.stderr || isatty(fileno(stdout))))
openlog(log_name, LOG_PERROR, LOG_DAEMON);
- if (c.mode == MODE_PASTA && !c.pasta_netns_fd) {
- char proc_path[PATH_MAX];
-
- pasta_start_ns(&c);
- snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net",
- pasta_child_pid);
- readlink(proc_path, pasta_child_ns, PATH_MAX);
- }
-
c.epollfd = epoll_create1(0);
if (c.epollfd == -1) {
perror("epoll_create1");
diff --git a/passt.h b/passt.h
index 4cce092..50e33a3 100644
--- a/passt.h
+++ b/passt.h
@@ -113,8 +113,10 @@ enum passt_modes {
* @addr6_ll_seen: Latest IPv6 link-local address seen as source from tap
* @gw6: Default IPv6 gateway
* @dns4: IPv4 DNS addresses, zero-terminated
- * @ifn: Name of routable interface
+ * @ifi: Index of routable interface
* @pasta_ifn: Name of namespace interface for pasta
+ * @pasta_ifn: Index of namespace interface for pasta
+ * @pasta_conf_ns: Configure namespace interface after creating it
* @no_tcp: Disable TCP operation
* @tcp: Context for TCP protocol handler
* @no_tcp: Disable UDP operation
@@ -167,8 +169,10 @@ struct ctx {
struct in6_addr gw6;
struct in6_addr dns6[MAXNS + 1];
- char ifn[IF_NAMESIZE];
+ unsigned int ifi;
char pasta_ifn[IF_NAMESIZE];
+ unsigned int pasta_ifi;
+ int pasta_conf_ns;
int no_tcp;
struct tcp_ctx tcp;
diff --git a/tap.c b/tap.c
index 0abf5a0..ec2b8b5 100644
--- a/tap.c
+++ b/tap.c
@@ -25,10 +25,10 @@
#include <arpa/inet.h>
#include <stdint.h>
#include <sys/epoll.h>
+#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/ioctl.h>
#include <sys/uio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -50,6 +50,8 @@
#include "ndp.h"
#include "dhcpv6.h"
#include "pcap.h"
+#include "netlink.h"
+#include "pasta.h"
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static struct tap_msg seq4[TAP_MSGS];
@@ -844,102 +846,23 @@ static void tap_sock_init_unix(struct ctx *c)
static int tun_ns_fd = -1;
/**
- * tap_sock_init_tun_ns() - Create tuntap fd in namespace, bring up loopback
+ * tap_ns_tun() - Get tuntap fd in namespace
* @c: Execution context
- */
-static int tap_sock_init_tun_ns(void *c)
-{
- int fd;
-
- if (ns_enter((struct ctx *)c))
- goto fail;
-
- if ((fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0)
- goto fail;
-
- tun_ns_fd = fd;
-
- if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
- perror("socket for ioctl");
- goto fail;
- }
-
- if (ioctl(fd, SIOCSIFFLAGS, &((struct ifreq){ .ifr_name = "lo",
- .ifr_flags = IFF_UP }))) {
- perror("SIOCSIFFLAGS ioctl for \"lo\"");
- close(fd);
- goto fail;
- }
-
- close(fd);
-
- return 0;
-
-fail:
- tun_ns_fd = -1;
- return 0;
-}
-
-/**
- * struct tap_sock_if_up_ns_arg - Arguments for tap_sock_if_up_ns()
- * @c: Execution context
- * @ifname: Interface name of tap device
- */
-struct tap_sock_if_up_ns_arg {
- struct ctx *c;
- char ifname[IFNAMSIZ];
-};
-
-/**
- * tap_sock_if_up_ns() - Bring up tap, get or set MAC address (if we have one)
- * @ifname: Interface name
*
- * Return: 0 -- not fundamental, the interface can be brought up later
+ * Return: 0
*/
-static int tap_sock_if_up_ns(void *arg)
+static int tap_ns_tun(void *arg)
{
- struct ifreq ifr = { .ifr_flags = IFF_UP };
- struct tap_sock_if_up_ns_arg *a;
- int fd;
-
- a = (struct tap_sock_if_up_ns_arg *)arg;
-
- if (ns_enter(a->c))
- return 0;
-
- if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
- perror("socket for ioctl");
- return 0;
- }
-
- strncpy(ifr.ifr_name, a->ifname, IFNAMSIZ);
- if (ioctl(fd, SIOCSIFFLAGS, &ifr)) {
- perror("SIOCSIFFLAGS ioctl for tap");
- goto out;
- }
-
- if (memcmp(a->c->mac_guest,
- ((uint8_t [ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }),
- ETH_ALEN)) {
- ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
- memcpy(ifr.ifr_hwaddr.sa_data, a->c->mac_guest, ETH_ALEN);
-
- if (ioctl(fd, SIOCSIFHWADDR, &ifr) < 0) {
- perror("SIOCSIFHWADDR ioctl for tap");
- goto out;
- }
- } else {
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
- perror("SIOCGIFHWADDR ioctl for tap");
- goto out;
- }
+ struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI };
+ struct ctx *c = (struct ctx *)arg;
- memcpy(a->c->mac_guest, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
- proto_update_l2_buf(a->c->mac_guest, NULL, NULL);
- }
+ strncpy(ifr.ifr_name, c->pasta_ifn, IFNAMSIZ);
-out:
- close(fd);
+ if (ns_enter(c) ||
+ (tun_ns_fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0 ||
+ ioctl(tun_ns_fd, TUNSETIFF, &ifr) ||
+ !(c->pasta_ifi = if_nametoindex(c->pasta_ifn)))
+ tun_ns_fd = -1;
return 0;
}
@@ -950,24 +873,13 @@ out:
*/
static void tap_sock_init_tun(struct ctx *c)
{
- struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI };
- struct tap_sock_if_up_ns_arg ifup_arg;
-
- NS_CALL(tap_sock_init_tun_ns, c);
+ NS_CALL(tap_ns_tun, c);
if (tun_ns_fd == -1) {
err("Failed to open tun socket in namespace");
exit(EXIT_FAILURE);
}
- strncpy(ifr.ifr_name, c->pasta_ifn, IFNAMSIZ);
- if (ioctl(tun_ns_fd, TUNSETIFF, &ifr)) {
- perror("TUNSETIFF ioctl");
- exit(EXIT_FAILURE);
- }
-
- strncpy(ifup_arg.ifname, c->pasta_ifn, IFNAMSIZ);
- ifup_arg.c = c;
- NS_CALL(tap_sock_if_up_ns, (void *)&ifup_arg);
+ pasta_ns_conf(c);
pcap_init(c, c->pasta_netns_fd);
diff --git a/tcp.c b/tcp.c
index c862632..2abcaf8 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1852,7 +1852,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
struct sockaddr_in6 addr6_ll = {
.sin6_family = AF_INET6,
.sin6_addr = c->addr6_ll,
- .sin6_scope_id = if_nametoindex(c->ifn),
+ .sin6_scope_id = c->ifi,
};
bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll));
}
diff --git a/util.c b/util.c
index 2a5c5ee..8b4d669 100644
--- a/util.c
+++ b/util.c
@@ -172,7 +172,7 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
addr6.sin6_addr = c->addr6;
} else if (bind_addr == BIND_LL) {
addr6.sin6_addr = c->addr6_ll;
- addr6.sin6_scope_id = if_nametoindex(c->ifn);
+ addr6.sin6_scope_id = c->ifi;
} else {
addr6.sin6_addr = in6addr_any;
}
diff --git a/util.h b/util.h
index 03c5ebb..605b708 100644
--- a/util.h
+++ b/util.h
@@ -39,6 +39,12 @@ void debug(const char *format, ...);
#define V6 1
#define IP_VERSIONS 2
+enum {
+ IP_VERSION_DISABLED = 0,
+ IP_VERSION_ENABLED,
+ IP_VERSION_PROBE,
+};
+
#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0])))
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
@@ -48,6 +54,9 @@ void debug(const char *format, ...);
#define PORT_EPHEMERAL_MIN ((1 << 15) + (1 << 14)) /* RFC 6335 */
#define PORT_IS_EPHEMERAL(port) ((port) >= PORT_EPHEMERAL_MIN)
+#define MAC_ZERO ((uint8_t [ETH_ALEN]){ 0 })
+#define MAC_IS_ZERO(addr) (!memcmp((addr), MAC_ZERO, ETH_ALEN))
+
#define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 4)
#define NS_CALL(fn, arg) \
do { \