aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-10-11 12:01:31 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-10-14 13:15:12 +0200
commit675174d4ba255383b213437e29b617d8f55dbc69 (patch)
tree7add0cf70e595028f9ca9485eb8e139d3d652bb6
parentdcd3605d14aba011fa6c2f4596cb7494f64d2b93 (diff)
downloadpasst-675174d4ba255383b213437e29b617d8f55dbc69.tar
passt-675174d4ba255383b213437e29b617d8f55dbc69.tar.gz
passt-675174d4ba255383b213437e29b617d8f55dbc69.tar.bz2
passt-675174d4ba255383b213437e29b617d8f55dbc69.tar.lz
passt-675174d4ba255383b213437e29b617d8f55dbc69.tar.xz
passt-675174d4ba255383b213437e29b617d8f55dbc69.tar.zst
passt-675174d4ba255383b213437e29b617d8f55dbc69.zip
conf, tap: Split netlink and pasta functions, allow interface configuration
Move netlink routines to their own file, and use netlink to configure or fetch all the information we need, except for the TUNSETIFF ioctl. Move pasta-specific functions to their own file as well, add parameters and calls to configure the tap interface in the namespace. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--Makefile9
-rw-r--r--conf.c448
-rw-r--r--netlink.c514
-rw-r--r--netlink.h6
-rw-r--r--passt.111
-rw-r--r--passt.c183
-rw-r--r--passt.h8
-rw-r--r--tap.c120
-rw-r--r--tcp.c2
-rw-r--r--util.c2
-rw-r--r--util.h9
11 files changed, 703 insertions, 609 deletions
diff --git a/Makefile b/Makefile
index 8a1b3c5..9f0e3bf 100644
--- a/Makefile
+++ b/Makefile
@@ -15,11 +15,12 @@ static: clean all
passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \
dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \
- siphash.c siphash.h tap.c tap.h icmp.c icmp.h tcp.c tcp.h \
- udp.c udp.h util.c util.h
+ netlink.c netlink.h pasta.c pasta.h siphash.c siphash.h tap.c tap.h \
+ icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h
$(CC) $(CFLAGS) \
- passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c \
- pcap.c ndp.c siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt
+ passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c pasta.c pcap.c \
+ ndp.c netlink.c siphash.c tap.c icmp.c tcp.c udp.c util.c \
+ -o passt
pasta: passt
ln -s passt pasta
diff --git a/conf.c b/conf.c
index e020417..6399fbb 100644
--- a/conf.c
+++ b/conf.c
@@ -17,7 +17,6 @@
#include <getopt.h>
#include <string.h>
#include <errno.h>
-#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -38,6 +37,8 @@
#include "passt.h"
#include "udp.h"
#include "tcp.h"
+#include "netlink.h"
+#include "pasta.h"
/**
* get_bound_ports() - Get maps of ports with bound sockets
@@ -267,301 +268,6 @@ overlap:
}
/**
- * nl_req() - Send netlink request and read response, doesn't return on failure
- * @buf: Buffer for response (BUFSIZ long)
- * @req: Request with netlink header
- * @len: Request length
- */
-static void nl_req(char *buf, void *req, ssize_t len)
-{
- int s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE), v = 1;
- struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
-
- if (s < 0 ||
- setsockopt(s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v)) ||
- bind(s, (struct sockaddr *)&addr, sizeof(addr)) ||
- (send(s, req, len, 0) < len) ||
- (recv(s, buf, BUFSIZ, 0) < 0)) {
- perror("netlink recv");
- exit(EXIT_FAILURE);
- }
-
- close(s);
-}
-
-/**
- * get_routes() - Get default route and fill in routable interface name
- * @c: Execution context
- */
-static void get_routes(struct ctx *c)
-{
- struct { struct nlmsghdr nlh; struct rtmsg rtm; } req = {
- .nlh.nlmsg_type = RTM_GETROUTE,
- .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
- .nlh.nlmsg_seq = 1,
- .rtm.rtm_family = AF_INET,
- .rtm.rtm_table = RT_TABLE_MAIN,
- .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
- .rtm.rtm_type = RTN_UNICAST,
- };
- char ifn[IFNAMSIZ], buf[BUFSIZ];
- struct nlmsghdr *nh;
- struct rtattr *rta;
- struct rtmsg *rtm;
- int n, na, v4, v6;
-
- if (!c->v4 && !c->v6)
- v4 = v6 = -1;
- else
- v6 = -!(v4 = -c->v4);
-
-v6:
- nl_req(buf, &req, sizeof(req));
- nh = (struct nlmsghdr *)buf;
-
- for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
- rtm = (struct rtmsg *)NLMSG_DATA(nh);
-
- if (rtm->rtm_dst_len ||
- (rtm->rtm_family != AF_INET && rtm->rtm_family != AF_INET6))
- continue;
-
- /* Filter on interface only if already given */
- if (*c->ifn) {
- *ifn = 0;
- for (rta = (struct rtattr *)RTM_RTA(rtm),
- na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != RTA_OIF)
- continue;
-
- if_indextoname(*(unsigned *)RTA_DATA(rta), ifn);
- break;
- }
-
- if (strcmp(ifn, c->ifn))
- goto next;
- }
-
- for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (!*c->ifn && rta->rta_type == RTA_OIF)
- if_indextoname(*(unsigned *)RTA_DATA(rta), ifn);
-
- if (v4 && rta->rta_type == RTA_GATEWAY &&
- rtm->rtm_family == AF_INET) {
- if (!c->gw4) {
- memcpy(&c->gw4, RTA_DATA(rta),
- sizeof(c->gw4));
- }
- v4 = 1;
- }
-
- if (v6 && rta->rta_type == RTA_GATEWAY &&
- rtm->rtm_family == AF_INET6) {
- if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6)) {
- memcpy(&c->gw6, RTA_DATA(rta),
- sizeof(c->gw6));
- }
- v6 = 1;
- }
- }
-
-next:
- if (nh->nlmsg_type == NLMSG_DONE)
- break;
- }
-
- if (v6 < 0 && req.rtm.rtm_family == AF_INET) {
- req.rtm.rtm_family = AF_INET6;
- req.nlh.nlmsg_seq++;
- goto v6;
- } else if (v6 < 0) {
- v6 = 0;
- }
-
- if ((v4 <= 0 && v6 <= 0) || (!*c->ifn && !*ifn)) {
- err("No routing information");
- exit(EXIT_FAILURE);
- }
-
- if (!*c->ifn)
- strncpy(c->ifn, ifn, IFNAMSIZ);
- c->v4 = v4;
- c->v6 = v6;
-}
-
-/**
- * get_l3_addrs() - Fetch IP addresses of external routable interface
- * @c: Execution context
- */
-static void get_l3_addrs(struct ctx *c)
-{
- struct { struct nlmsghdr nlh; struct ifaddrmsg ifa; } req = {
- .nlh.nlmsg_type = RTM_GETADDR,
- .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
- .nlh.nlmsg_seq = 1,
- .ifa.ifa_family = AF_INET,
- .ifa.ifa_index = if_nametoindex(c->ifn),
- };
- struct ifaddrmsg *ifa;
- struct nlmsghdr *nh;
- struct rtattr *rta;
- int n, na, v4, v6;
- char buf[BUFSIZ];
-
- if (c->v4) {
- v4 = -1;
- if ((c->addr4_seen = c->addr4))
- v4 = 1;
- }
-
- if (c->v6) {
- v6 = -2;
- if (!IN6_IS_ADDR_UNSPECIFIED(&c->addr6)) {
- memcpy(&c->addr6_seen, &c->addr6, sizeof(c->addr6));
- memcpy(&c->addr6_ll_seen, &c->addr6, sizeof(c->addr6));
- v6 = -1;
- }
- }
-
-next_v:
- if (v4 < 0)
- req.ifa.ifa_family = AF_INET;
- else if (v6 < 0)
- req.ifa.ifa_family = AF_INET6;
- else
- goto mask_only;
-
- nl_req(buf, &req, sizeof(req));
- nh = (struct nlmsghdr *)buf;
-
- for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
- if (nh->nlmsg_type != RTM_NEWADDR)
- goto next;
-
- ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
-
- for (rta = (struct rtattr *)IFA_RTA(ifa), na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != IFA_ADDRESS)
- continue;
-
- if (v4 < 0) {
- memcpy(&c->addr4, RTA_DATA(rta),
- sizeof(c->addr4));
- memcpy(&c->addr4_seen, RTA_DATA(rta),
- sizeof(c->addr4_seen));
- v4 = 1;
- } else if (v6 < 0) {
- if (v6 == -2 &&
- ifa->ifa_scope == RT_SCOPE_UNIVERSE) {
- memcpy(&c->addr6, RTA_DATA(rta),
- sizeof(c->addr6));
- memcpy(&c->addr6_seen, RTA_DATA(rta),
- sizeof(c->addr6_seen));
- memcpy(&c->addr6_ll_seen, RTA_DATA(rta),
- sizeof(c->addr6_ll_seen));
- } else if (ifa->ifa_scope == RT_SCOPE_LINK) {
- memcpy(&c->addr6_ll, RTA_DATA(rta),
- sizeof(c->addr6_ll));
- }
- if (!IN6_IS_ADDR_UNSPECIFIED(&c->addr6) &&
- !IN6_IS_ADDR_UNSPECIFIED(&c->addr6_ll))
- v6 = 1;
- }
- }
-next:
- if (nh->nlmsg_type == NLMSG_DONE)
- break;
- }
-
- if (v4 >= 0 && v6 < 0)
- goto next_v;
-
- if (v4 < c->v4 || v6 < c->v6)
- goto out;
-
-mask_only:
- if (v4 && !c->mask4) {
- if (IN_CLASSA(ntohl(c->addr4)))
- c->mask4 = htonl(IN_CLASSA_NET);
- else if (IN_CLASSB(ntohl(c->addr4)))
- c->mask4 = htonl(IN_CLASSB_NET);
- else if (IN_CLASSC(ntohl(c->addr4)))
- c->mask4 = htonl(IN_CLASSC_NET);
- else
- c->mask4 = 0xffffffff;
- }
-
- return;
-out:
- err("Couldn't get addresses for routable interface");
- exit(EXIT_FAILURE);
-}
-
-/**
- * get_l2_addr() - Fetch hardware addresses of external routable interface
- * @c: Execution context
- */
-static void get_l2_addr(struct ctx *c)
-{
- struct { struct nlmsghdr nlh; struct ifinfomsg ifi; } req = {
- .nlh.nlmsg_type = RTM_GETLINK,
- .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP_FILTERED,
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
- .nlh.nlmsg_seq = 1,
- .ifi.ifi_family = AF_UNSPEC,
- .ifi.ifi_index = if_nametoindex(c->ifn),
- };
- struct ifinfomsg *ifi;
- struct nlmsghdr *nh;
- struct rtattr *rta;
- char buf[BUFSIZ];
- int n, na;
-
- if (memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
- goto mac_guest;
-
- nl_req(buf, &req, sizeof(req));
- nh = (struct nlmsghdr *)buf;
-
- for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
- if (nh->nlmsg_type != RTM_NEWLINK)
- goto next;
-
- ifi = (struct ifinfomsg *)NLMSG_DATA(nh);
-
- for (rta = (struct rtattr *)IFLA_RTA(ifi), na = RTM_PAYLOAD(nh);
- RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type != IFLA_ADDRESS)
- continue;
-
- memcpy(c->mac, RTA_DATA(rta), ETH_ALEN);
- break;
- }
-next:
- if (nh->nlmsg_type == NLMSG_DONE)
- break;
- }
-
- if (!memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
- goto out;
-
-mac_guest:
- if (memcmp(c->mac_guest, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
- memset(&c->mac_guest, 0xff, sizeof(c->mac_guest));
-
- return;
-
-out:
- err("Couldn't get hardware address for routable interface");
- exit(EXIT_FAILURE);
-}
-
-/**
* get_dns() - Get nameserver addresses from local /etc/resolv.conf
* @c: Execution context
*/
@@ -731,6 +437,91 @@ static int conf_ns_opt(struct ctx *c,
}
/**
+ * conf_ip() - Verify or detect IPv4/IPv6 support, get relevant addresses
+ * @c: Execution context
+ */
+static void conf_ip(struct ctx *c)
+{
+ int v4, v6;
+
+ if (c->v4) {
+ c->v4 = IP_VERSION_ENABLED;
+ v4 = IP_VERSION_PROBE;
+ v6 = c->v6 = IP_VERSION_DISABLED;
+ } else if (c->v6) {
+ c->v6 = IP_VERSION_ENABLED;
+ v6 = IP_VERSION_PROBE;
+ v4 = c->v4 = IP_VERSION_DISABLED;
+ } else {
+ c->v4 = c->v6 = IP_VERSION_ENABLED;
+ v4 = v6 = IP_VERSION_PROBE;
+ }
+
+ if (!c->ifi)
+ c->ifi = nl_get_ext_if(&v4, &v6);
+
+ if (v4 != IP_VERSION_DISABLED) {
+ if (!c->gw4)
+ nl_route(0, c->ifi, AF_INET, &c->gw4);
+
+ if (!c->addr4) {
+ nl_addr(0, c->ifi, AF_INET, &c->addr4, 0, NULL);
+ if (!c->mask4) {
+ if (IN_CLASSA(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSA_NET);
+ else if (IN_CLASSB(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSB_NET);
+ else if (IN_CLASSC(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSC_NET);
+ else
+ c->mask4 = 0xffffffff;
+ }
+ }
+
+ memcpy(&c->addr4_seen, &c->addr4, sizeof(c->addr4_seen));
+
+ if (!memcmp(c->mac, MAC_ZERO, ETH_ALEN))
+ nl_link(0, c->ifi, c->mac, 0);
+ }
+
+ if (c->mode == MODE_PASST)
+ memset(&c->mac_guest, 0xff, sizeof(c->mac_guest));
+
+ if (v6 != IP_VERSION_DISABLED) {
+ if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6))
+ nl_route(0, c->ifi, AF_INET6, &c->gw6);
+
+ nl_addr(0, c->ifi, AF_INET6,
+ IN6_IS_ADDR_UNSPECIFIED(&c->addr6) ? &c->addr6 : NULL,
+ 0, &c->addr6_ll);
+
+ memcpy(&c->addr6_seen, &c->addr6, sizeof(c->addr4_seen));
+ memcpy(&c->addr6_ll_seen, &c->addr6, sizeof(c->addr4_seen));
+ }
+
+ if (!c->gw4 || !c->addr4 ||
+ !memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN))
+ v4 = IP_VERSION_DISABLED;
+ else
+ v4 = IP_VERSION_ENABLED;
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6) ||
+ IN6_IS_ADDR_UNSPECIFIED(&c->addr6) ||
+ IN6_IS_ADDR_UNSPECIFIED(&c->addr6_ll))
+ v6 = IP_VERSION_DISABLED;
+ else
+ v6 = IP_VERSION_ENABLED;
+
+ if ((v4 == IP_VERSION_DISABLED) && (v6 == IP_VERSION_DISABLED)) {
+ err("External interface not usable");
+ exit(EXIT_FAILURE);
+ }
+
+ c->v4 = v4;
+ c->v6 = v6;
+}
+
+/**
* usage() - Print usage and exit
* @name: Executable name
*/
@@ -868,20 +659,22 @@ pasta_opts:
info( " implied if PATH or NAME are given without --userns");
info( " --nsrun-dir Directory for nsfs mountpoints");
info( " default: " NETNS_RUN_DIR);
+ info( " --config-net Configure tap interface in namespace");
+ info( " --ns-mac-addr ADDR Set MAC address on tap interface");
exit(EXIT_FAILURE);
}
void conf_print(struct ctx *c)
{
- char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN];
+ char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN], ifn[IFNAMSIZ];
int i;
if (c->mode == MODE_PASTA) {
info("Outbound interface: %s, namespace interface: %s",
- c->ifn, c->pasta_ifn);
+ if_indextoname(c->ifi, ifn), c->pasta_ifn);
} else {
- info("Outbound interface: %s", c->ifn);
+ info("Outbound interface: %s", if_indextoname(c->ifi, ifn));
}
if (c->v4) {
@@ -991,6 +784,8 @@ void conf(struct ctx *c, int argc, char **argv)
{"userns", required_argument, NULL, 2 },
{"netns-only", no_argument, &c->netns_only, 1 },
{"nsrun-dir", required_argument, NULL, 3 },
+ {"config-net", no_argument, &c->pasta_conf_ns, 1 },
+ {"ns-mac-addr", required_argument, NULL, 4 },
{ 0 },
};
struct get_bound_ports_ns_arg ns_ports_arg = { .c = c };
@@ -1051,6 +846,22 @@ void conf(struct ctx *c, int argc, char **argv)
usage(argv[0]);
}
break;
+ case 4:
+ if (c->mode != MODE_PASTA) {
+ err("--ns-mac-addr is for pasta mode only");
+ usage(argv[0]);
+ }
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ errno = 0;
+ b = strtol(optarg + i * 3, NULL, 16);
+ if (b < 0 || b > UCHAR_MAX || errno) {
+ err("Invalid MAC address: %s", optarg);
+ usage(argv[0]);
+ }
+ c->mac_guest[i] = b;
+ }
+ break;
case 'd':
if (c->debug) {
err("Multiple --debug options given");
@@ -1217,12 +1028,16 @@ void conf(struct ctx *c, int argc, char **argv)
usage(argv[0]);
break;
case 'i':
- if (*c->ifn) {
+ if (c->ifi) {
err("Redundant interface: %s", optarg);
usage(argv[0]);
}
- strncpy(c->ifn, optarg, IFNAMSIZ - 1);
+ if (!(c->ifi = if_nametoindex(optarg))) {
+ err("Invalid interface name %s: %s", optarg,
+ strerror(errno));
+ usage(argv[0]);
+ }
break;
case 'D':
if (c->no_dns ||
@@ -1320,25 +1135,26 @@ void conf(struct ctx *c, int argc, char **argv)
usage(argv[0]);
}
- if (c->v4 || c->v6) {
- if (!c->v4)
- c->no_dhcp = 1;
+ if (c->mode == MODE_PASTA && !c->pasta_netns_fd)
+ pasta_start_ns(c);
- if (!c->v6) {
- c->no_ndp = 1;
- c->no_dhcpv6 = 1;
- }
+ if (nl_sock_init(c)) {
+ err("Failed to get netlink socket");
+ exit(EXIT_FAILURE);
}
- if (!c->mtu) {
- c->mtu = (ETH_MAX_MTU - ETH_HLEN) /
- sizeof(uint32_t) * sizeof(uint32_t);
+ conf_ip(c);
+
+ if (!c->v4)
+ c->no_dhcp = 1;
+
+ if (!c->v6) {
+ c->no_ndp = 1;
+ c->no_dhcpv6 = 1;
}
- get_routes(c);
- get_l3_addrs(c);
- if (c->v4)
- get_l2_addr(c);
+ if (!c->mtu)
+ c->mtu = ROUND_DOWN(ETH_MAX_MTU - ETH_HLEN, sizeof(uint32_t));
if (c->mode == MODE_PASTA && dns4 == c->dns4 && dns6 == c->dns6)
c->no_dns = 1;
@@ -1347,7 +1163,7 @@ void conf(struct ctx *c, int argc, char **argv)
get_dns(c);
if (!*c->pasta_ifn)
- strncpy(c->pasta_ifn, c->ifn, IFNAMSIZ);
+ if_indextoname(c->ifi, c->pasta_ifn);
#ifdef PASST_LEGACY_NO_OPTIONS
if (c->mode == MODE_PASST) {
diff --git a/netlink.c b/netlink.c
new file mode 100644
index 0000000..ca2e77f
--- /dev/null
+++ b/netlink.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ * for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ * for network namespace/tap device mode
+ *
+ * netlink.c - rtnetlink routines: interfaces, addresses, routes
+ *
+ * Copyright (c) 2020-2021 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <string.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include "util.h"
+#include "passt.h"
+#include "netlink.h"
+
+/* Socket in init, in target namespace, sequence (just needs to be monotonic) */
+static int nl_sock = -1;
+static int nl_sock_ns = -1;
+static int nl_seq;
+
+/**
+ * __nl_sock_init() - Set up netlink sockets in init and target namespace
+ * @arg: Execution context
+ *
+ * Return: 0
+ */
+static int __nl_sock_init(void *arg)
+{
+ struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
+ struct ctx *c = (struct ctx *)arg;
+ int *s = &nl_sock, v = 1;
+
+ns:
+ if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 ||
+ bind(*s, (struct sockaddr *)&addr, sizeof(addr)) ||
+ setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v)))
+ *s = -1;
+
+ if (*s == -1 || !c || s == &nl_sock_ns)
+ return 0;
+
+ ns_enter((struct ctx *)arg);
+ s = &nl_sock_ns;
+ goto ns;
+}
+
+/**
+ * nl_sock_init() - Call __nl_sock_init() and check for failures
+ * @c: Execution context
+ *
+ * Return: -EIO if sockets couldn't be set up, 0 otherwise
+ */
+int nl_sock_init(struct ctx *c)
+{
+ if (c->mode == MODE_PASTA) {
+ NS_CALL(__nl_sock_init, c);
+ if (nl_sock_ns == -1)
+ return -EIO;
+ } else {
+ __nl_sock_init(NULL);
+ }
+
+ if (nl_sock == -1)
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * nl_req() - Send netlink request and read response
+ * @ns: Use netlink socket in namespace
+ * @buf: Buffer for response (at least BUFSIZ long)
+ * @req: Request with netlink header
+ * @len: Request length
+ *
+ * Return: received length on success, negative error code on failure
+ */
+static int nl_req(int ns, char *buf, void *req, ssize_t len)
+{
+ int n, s = ns ? nl_sock_ns : nl_sock, done = 0;
+ char flush[BUFSIZ];
+
+ while (!done && (n = recv(s, flush, sizeof(flush), MSG_DONTWAIT)) > 0) {
+ struct nlmsghdr *nh = (struct nlmsghdr *)flush;
+
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type == NLMSG_DONE ||
+ nh->nlmsg_type == NLMSG_ERROR) {
+ done = 1;
+ break;
+ }
+ }
+ }
+
+ if ((send(s, req, len, 0) < len) || (len = recv(s, buf, BUFSIZ, 0)) < 0)
+ return -errno;
+
+ return len;
+}
+
+/**
+ * nl_get_ext_if() - Get interface index supporting IP versions being probed
+ * @v4: Probe IPv4 support, set to ENABLED or DISABLED on return
+ * @v6: Probe IPv4 support, set to ENABLED or DISABLED on return
+ *
+ * Return: interface index, 0 if not found
+ */
+unsigned int nl_get_ext_if(int *v4, int *v6)
+{
+ struct { struct nlmsghdr nlh; struct rtmsg rtm; } req = {
+ .nlh.nlmsg_type = RTM_GETROUTE,
+ .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
+ .nlh.nlmsg_seq = nl_seq++,
+
+ .rtm.rtm_table = RT_TABLE_MAIN,
+ .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
+ .rtm.rtm_type = RTN_UNICAST,
+ };
+ unsigned int i, first_v4 = 0, first_v6 = 0;
+ uint8_t has_v4[PAGE_SIZE * 8 / 8] = { 0 }; /* See __dev_alloc_name() */
+ uint8_t has_v6[PAGE_SIZE * 8 / 8] = { 0 }; /* in kernel */
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rtm;
+ char buf[BUFSIZ];
+ long *word, tmp;
+ int n, na, *v;
+ uint8_t *vmap;
+
+ if (*v4 == IP_VERSION_PROBE) {
+ v = v4;
+ req.rtm.rtm_family = AF_INET;
+ vmap = has_v4;
+ } else if (*v6 == IP_VERSION_PROBE) {
+v6:
+ v = v6;
+ req.rtm.rtm_family = AF_INET6;
+ vmap = has_v6;
+ } else {
+ return 0;
+ }
+
+ n = nl_req(0, buf, &req, sizeof(req));
+ nh = (struct nlmsghdr *)buf;
+
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ rtm = (struct rtmsg *)NLMSG_DATA(nh);
+
+ if (rtm->rtm_dst_len || rtm->rtm_family != req.rtm.rtm_family)
+ continue;
+
+ for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ unsigned int ifi;
+
+ if (rta->rta_type != RTA_OIF)
+ continue;
+
+ ifi = *(unsigned int *)RTA_DATA(rta);
+
+ if (*v4 == IP_VERSION_DISABLED ||
+ *v6 == IP_VERSION_DISABLED) {
+ *v = IP_VERSION_ENABLED;
+ return ifi;
+ }
+
+ if (v == v4 && !first_v4)
+ first_v4 = ifi;
+
+ if (v == v6 && !first_v6)
+ first_v6 = ifi;
+
+ bitmap_set(vmap, ifi);
+ }
+ }
+
+ if (v == v4 && *v6 == IP_VERSION_PROBE) {
+ req.nlh.nlmsg_seq = nl_seq++;
+ goto v6;
+ }
+
+ word = (long *)has_v4;
+ for (i = 0; i < ARRAY_SIZE(has_v4) / sizeof(long); i++, word++) {
+ int ifi;
+
+ tmp = *word;
+ while ((n = ffsl(tmp))) {
+ ifi = i * sizeof(long) * 8 + n - 1;
+ if (!first_v4)
+ first_v4 = ifi;
+
+ tmp &= ~(1UL << (n - 1));
+ if (bitmap_isset(has_v6, ifi)) {
+ *v4 = *v6 = IP_VERSION_ENABLED;
+ return ifi;
+ }
+ }
+ }
+
+ if (first_v4) {
+ *v4 = IP_VERSION_ENABLED;
+ *v6 = IP_VERSION_DISABLED;
+ return first_v4;
+ }
+
+ if (first_v6) {
+ *v4 = IP_VERSION_ENABLED;
+ *v6 = IP_VERSION_DISABLED;
+ return first_v6;
+ }
+
+ err("No external routable interface for any IP protocol");
+ return 0;
+}
+
+/**
+ * nl_route() - Get/set default gateway for given interface and address family
+ * @ns: Use netlink socket in namespace
+ * @ifi: Interface index
+ * @af: Address family
+ * @gw: Default gateway to fill if zero, to set if not
+ */
+void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw)
+{
+ int set = (af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(gw)) ||
+ (af == AF_INET && *(uint32_t *)gw);
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct rtmsg rtm;
+ struct rtattr rta;
+ unsigned int ifi;
+ union {
+ struct {
+ struct rtattr rta_dst;
+ struct in6_addr d;
+ struct rtattr rta_gw;
+ struct in6_addr a;
+ } r6;
+ struct {
+ struct rtattr rta_dst;
+ uint32_t d;
+ struct rtattr rta_gw;
+ uint32_t a;
+ uint8_t end;
+ } r4;
+ };
+ } req = {
+ .nlh.nlmsg_type = set ? RTM_NEWROUTE : RTM_GETROUTE,
+ .nlh.nlmsg_flags = NLM_F_REQUEST,
+ .nlh.nlmsg_seq = nl_seq++,
+
+ .rtm.rtm_family = af,
+ .rtm.rtm_table = RT_TABLE_MAIN,
+ .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
+ .rtm.rtm_type = RTN_UNICAST,
+
+ .rta.rta_type = RTA_OIF,
+ .rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
+ .ifi = ifi,
+ };
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rtm;
+ char buf[BUFSIZ];
+ int n, na;
+
+ if (set) {
+ if (af == AF_INET6) {
+ req.nlh.nlmsg_len = sizeof(req);
+
+ req.r6.rta_dst.rta_type = RTA_DST;
+ req.r6.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r6.d));
+
+ memcpy(&req.r6.a, gw, sizeof(req.r6.a));
+ req.r6.rta_gw.rta_type = RTA_GATEWAY;
+ req.r6.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r6.a));
+ } else {
+ req.nlh.nlmsg_len = offsetof(struct req_t, r4.end);
+
+ req.r4.rta_dst.rta_type = RTA_DST;
+ req.r4.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r4.d));
+
+ req.r4.a = *(uint32_t *)gw;
+ req.r4.rta_gw.rta_type = RTA_GATEWAY;
+ req.r4.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r4.a));
+ }
+
+ req.rtm.rtm_protocol = RTPROT_BOOT;
+ req.nlh.nlmsg_flags |= NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ } else {
+ req.nlh.nlmsg_len = offsetof(struct req_t, r6);
+ req.nlh.nlmsg_flags |= NLM_F_DUMP;
+ }
+
+ n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
+ if (set)
+ return;
+
+ nh = (struct nlmsghdr *)buf;
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type != RTM_NEWROUTE)
+ goto next;
+
+ rtm = (struct rtmsg *)NLMSG_DATA(nh);
+ if (rtm->rtm_dst_len)
+ continue;
+
+ for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != RTA_GATEWAY)
+ continue;
+
+ memcpy(gw, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ return;
+ }
+
+next:
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+}
+
+/**
+ * nl_addr() - Get/set IP addresses
+ * @ns: Use netlink socket in namespace
+ * @ifi: Interface index
+ * @af: Address family
+ * @addr: Global address to fill if zero, to set if not, ignored if NULL
+ * @addr_l: Link-scoped address to fill, NULL if not requested
+ */
+void nl_addr(int ns, unsigned int ifi, sa_family_t af,
+ void *addr, int prefix_len, void *addr_l)
+{
+ int set = addr && ((af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(addr)) ||
+ (af == AF_INET && *(uint32_t *)addr));
+ struct req_t {
+ struct nlmsghdr nlh;
+ struct ifaddrmsg ifa;
+ union {
+ struct {
+ struct rtattr rta_l;
+ uint32_t l;
+ struct rtattr rta_a;
+ uint32_t a;
+
+ uint8_t end;
+ } a4;
+ struct {
+ struct rtattr rta_l;
+ struct in6_addr l;
+ struct rtattr rta_a;
+ struct in6_addr a;
+ } a6;
+ };
+ } req = {
+ .nlh.nlmsg_type = set ? RTM_NEWADDR : RTM_GETADDR,
+ .nlh.nlmsg_flags = NLM_F_REQUEST,
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+ .nlh.nlmsg_seq = nl_seq++,
+
+ .ifa.ifa_family = af,
+ .ifa.ifa_index = ifi,
+ .ifa.ifa_prefixlen = prefix_len,
+ };
+ struct ifaddrmsg *ifa;
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ char buf[BUFSIZ];
+ int n, na;
+
+ if (set) {
+ if (af == AF_INET6) {
+ req.nlh.nlmsg_len = sizeof(req);
+
+ memcpy(&req.a6.l, addr, sizeof(req.a6.l));
+ req.a6.rta_l.rta_len = RTA_LENGTH(sizeof(req.a6.l));
+ req.a4.rta_l.rta_type = IFA_LOCAL;
+ memcpy(&req.a6.a, addr, sizeof(req.a6.a));
+ req.a6.rta_a.rta_len = RTA_LENGTH(sizeof(req.a6.a));
+ req.a6.rta_a.rta_type = IFA_ADDRESS;
+ } else {
+ req.nlh.nlmsg_len = offsetof(struct req_t, a4.end);
+
+ req.a4.l = req.a4.a = *(uint32_t *)addr;
+ req.a4.rta_l.rta_len = RTA_LENGTH(sizeof(req.a4.l));
+ req.a4.rta_l.rta_type = IFA_LOCAL;
+ req.a4.rta_a.rta_len = RTA_LENGTH(sizeof(req.a4.a));
+ req.a4.rta_a.rta_type = IFA_ADDRESS;
+ }
+
+ req.ifa.ifa_scope = RT_SCOPE_UNIVERSE;
+ req.nlh.nlmsg_flags |= NLM_F_CREATE | NLM_F_ACK | NLM_F_EXCL;
+ } else {
+ req.nlh.nlmsg_flags |= NLM_F_DUMP;
+ }
+
+ n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
+ if (set)
+ return;
+
+ nh = (struct nlmsghdr *)buf;
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type != RTM_NEWADDR)
+ goto next;
+
+ ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
+
+ for (rta = (struct rtattr *)IFA_RTA(ifa), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFA_ADDRESS)
+ continue;
+
+ if (af == AF_INET && addr && !*(uint32_t *)addr)
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ else if (af == AF_INET6 && addr &&
+ ifa->ifa_scope == RT_SCOPE_UNIVERSE &&
+ IN6_IS_ADDR_UNSPECIFIED(addr))
+ memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
+
+ if (addr_l &&
+ af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK &&
+ IN6_IS_ADDR_UNSPECIFIED(addr_l))
+ memcpy(addr_l, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ }
+next:
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+}
+
+/**
+ * nl_link() - Get/set link attributes
+ * @ns: Use netlink socket in namespace
+ * @ifi: Interface index
+ * @mac: MAC address to fill, if passed as zero, to set otherwise
+ * @up: If set, bring up the link
+ */
+void nl_link(int ns, unsigned int ifi, void *mac, int up)
+{
+ int change = !MAC_IS_ZERO(mac) || up;
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ struct rtattr rta;
+ unsigned char mac[ETH_ALEN];
+ } req = {
+ .nlh.nlmsg_type = change ? RTM_NEWLINK : RTM_GETLINK,
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_flags = NLM_F_REQUEST | (change ? NLM_F_ACK : 0),
+ .nlh.nlmsg_seq = nl_seq++,
+ .ifm.ifi_family = AF_UNSPEC,
+ .ifm.ifi_index = ifi,
+ .ifm.ifi_flags = up ? IFF_UP : 0,
+ .ifm.ifi_change = up ? IFF_UP : 0,
+
+ .rta.rta_type = IFLA_ADDRESS,
+ .rta.rta_len = RTA_LENGTH(ETH_ALEN),
+ };
+ struct ifinfomsg *ifm;
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ char buf[BUFSIZ];
+ int n, na;
+
+ if (!MAC_IS_ZERO(mac)) {
+ req.nlh.nlmsg_len = sizeof(req);
+ memcpy(req.mac, mac, ETH_ALEN);
+ }
+
+ n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
+
+ if (!MAC_IS_ZERO(mac) || up)
+ return;
+
+ nh = (struct nlmsghdr *)buf;
+ for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+ if (nh->nlmsg_type != RTM_NEWLINK)
+ goto next;
+
+ ifm = (struct ifinfomsg *)NLMSG_DATA(nh);
+
+ for (rta = (struct rtattr *)IFLA_RTA(ifm), na = RTM_PAYLOAD(nh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != IFLA_ADDRESS)
+ continue;
+
+ memcpy(mac, RTA_DATA(rta), ETH_ALEN);
+ break;
+ }
+next:
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+}
diff --git a/netlink.h b/netlink.h
new file mode 100644
index 0000000..654e17e
--- /dev/null
+++ b/netlink.h
@@ -0,0 +1,6 @@
+int nl_sock_init(struct ctx *c);
+unsigned int nl_get_ext_if(int *v4, int *v6);
+void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw);
+void nl_addr(int ns, unsigned int ifi, sa_family_t af,
+ void *addr, int prefix_len, void *addr_l);
+void nl_link(int ns, unsigned int ifi, void *mac, int up);
diff --git a/passt.1 b/passt.1
index b21333b..df9892b 100644
--- a/passt.1
+++ b/passt.1
@@ -375,6 +375,17 @@ Directory for nsfs mountpoints, used as path prefix for names of namespaces.
The default path is shown with --help.
+.TP
+.BR \-\-config-net
+Configure networking in the namespace: set up addresses and routes as configured
+or sourced from the host, and bring up the tap interface.
+
+.TP
+.BR \-\-ns-mac-addr " " \fIaddr
+Configure MAC address \fIaddr\fR on the tap interface in the namespace.
+
+Default is to let the tap driver build a pseudorandom hardware address.
+
.SH EXAMPLES
.SS \fBpasta
diff --git a/passt.c b/passt.c
index e0519f6..b411657 100644
--- a/passt.c
+++ b/passt.c
@@ -62,6 +62,7 @@
#include "pcap.h"
#include "tap.h"
#include "conf.h"
+#include "pasta.h"
#define EPOLL_EVENTS 8
@@ -156,178 +157,6 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
udp_update_l2_buf(eth_d, eth_s, ip_da);
}
-static int pasta_child_pid;
-static char pasta_child_ns[PATH_MAX];
-
-/**
- * pasta_ns_cleanup() - Look for processes in namespace, terminate them
- */
-static void pasta_ns_cleanup(void)
-{
- char proc_path[PATH_MAX], ns_link[PATH_MAX];
- int recheck = 0, found = 0, waited = 0;
- struct dirent *dp;
- DIR *dirp;
-
- if (!*pasta_child_ns)
- return;
-
-loop:
- if (!(dirp = opendir("/proc")))
- return;
-
- while ((dp = readdir(dirp))) {
- pid_t pid;
-
- errno = 0;
- pid = strtol(dp->d_name, NULL, 0);
- if (!pid || errno)
- continue;
-
- snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", pid);
- if (readlink(proc_path, ns_link, PATH_MAX) < 0)
- continue;
-
- if (!strncmp(ns_link, pasta_child_ns, PATH_MAX)) {
- found = 1;
- if (waited)
- kill(pid, SIGKILL);
- else
- kill(pid, SIGQUIT);
- }
- }
-
- closedir(dirp);
-
- if (!found)
- return;
-
- if (waited) {
- if (recheck) {
- info("Some processes in namespace didn't quit");
- } else {
- found = 0;
- recheck = 1;
- goto loop;
- }
- return;
- }
-
- info("Waiting for all processes in namespace to terminate");
- sleep(1);
- waited = 1;
- goto loop;
-}
-
-/**
- * pasta_child_handler() - Exit once shell spawned by pasta_start_ns() exits
- * @signal: Unused, handler deals with SIGCHLD only
- */
-static void pasta_child_handler(int signal)
-{
- siginfo_t infop;
-
- (void)signal;
-
- if (pasta_child_pid &&
- !waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) {
- if (infop.si_pid == pasta_child_pid) {
- pasta_ns_cleanup();
- exit(EXIT_SUCCESS);
- }
- }
-
- waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
- waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
-}
-
-/**
- * pasta_wait_for_ns() - Busy loop until we can enter the target namespace
- * @arg: Execution context
- *
- * Return: 0
- */
-static int pasta_wait_for_ns(void *arg)
-{
- struct ctx *c = (struct ctx *)arg;
- char ns[PATH_MAX];
-
- if (c->netns_only)
- goto netns;
-
- snprintf(ns, PATH_MAX, "/proc/%i/ns/user", pasta_child_pid);
- do
- while ((c->pasta_userns_fd = open(ns, O_RDONLY)) < 0);
- while (setns(c->pasta_userns_fd, 0) && !close(c->pasta_userns_fd));
-
-netns:
- snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid);
- do
- while ((c->pasta_netns_fd = open(ns, O_RDONLY)) < 0);
- while (setns(c->pasta_netns_fd, 0) && !close(c->pasta_netns_fd));
-
- return 0;
-}
-
-/**
- * pasta_start_ns() - Fork shell in new namespace if target ns is not given
- * @c: Execution context
- */
-static void pasta_start_ns(struct ctx *c)
-{
- char buf[BUFSIZ], *shell;
- int euid = geteuid();
- int fd;
-
- c->foreground = 1;
- if (!c->debug)
- c->quiet = 1;
-
- if ((pasta_child_pid = fork()) == -1) {
- perror("fork");
- exit(EXIT_FAILURE);
- }
-
- if (pasta_child_pid) {
- NS_CALL(pasta_wait_for_ns, c);
- return;
- }
-
- if (unshare(CLONE_NEWNET | (c->netns_only ? 0 : CLONE_NEWUSER))) {
- perror("unshare");
- exit(EXIT_FAILURE);
- }
-
- if (!c->netns_only) {
- snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1);
-
- fd = open("/proc/self/uid_map", O_WRONLY);
- write(fd, buf, strlen(buf));
- close(fd);
-
- fd = open("/proc/self/setgroups", O_WRONLY);
- write(fd, "deny", sizeof("deny"));
- close(fd);
-
- fd = open("/proc/self/gid_map", O_WRONLY);
- write(fd, buf, strlen(buf));
- close(fd);
- }
-
- fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY);
- write(fd, "0 0", strlen("0 0"));
- close(fd);
-
- shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
- if (strstr(shell, "/bash"))
- execve(shell, ((char *[]) { shell, "-l", NULL }), environ);
- else
- execve(shell, ((char *[]) { shell, NULL }), environ);
-
- perror("execve");
- exit(EXIT_FAILURE);
-}
-
/**
* main() - Entry point and main loop
* @argc: Argument count
@@ -366,20 +195,12 @@ int main(int argc, char **argv)
openlog(log_name, 0, LOG_DAEMON);
setlogmask(LOG_MASK(LOG_EMERG));
+
conf(&c, argc, argv);
if (!c.debug && (c.stderr || isatty(fileno(stdout))))
openlog(log_name, LOG_PERROR, LOG_DAEMON);
- if (c.mode == MODE_PASTA && !c.pasta_netns_fd) {
- char proc_path[PATH_MAX];
-
- pasta_start_ns(&c);
- snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net",
- pasta_child_pid);
- readlink(proc_path, pasta_child_ns, PATH_MAX);
- }
-
c.epollfd = epoll_create1(0);
if (c.epollfd == -1) {
perror("epoll_create1");
diff --git a/passt.h b/passt.h
index 4cce092..50e33a3 100644
--- a/passt.h
+++ b/passt.h
@@ -113,8 +113,10 @@ enum passt_modes {
* @addr6_ll_seen: Latest IPv6 link-local address seen as source from tap
* @gw6: Default IPv6 gateway
* @dns4: IPv4 DNS addresses, zero-terminated
- * @ifn: Name of routable interface
+ * @ifi: Index of routable interface
* @pasta_ifn: Name of namespace interface for pasta
+ * @pasta_ifn: Index of namespace interface for pasta
+ * @pasta_conf_ns: Configure namespace interface after creating it
* @no_tcp: Disable TCP operation
* @tcp: Context for TCP protocol handler
* @no_tcp: Disable UDP operation
@@ -167,8 +169,10 @@ struct ctx {
struct in6_addr gw6;
struct in6_addr dns6[MAXNS + 1];
- char ifn[IF_NAMESIZE];
+ unsigned int ifi;
char pasta_ifn[IF_NAMESIZE];
+ unsigned int pasta_ifi;
+ int pasta_conf_ns;
int no_tcp;
struct tcp_ctx tcp;
diff --git a/tap.c b/tap.c
index 0abf5a0..ec2b8b5 100644
--- a/tap.c
+++ b/tap.c
@@ -25,10 +25,10 @@
#include <arpa/inet.h>
#include <stdint.h>
#include <sys/epoll.h>
+#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/ioctl.h>
#include <sys/uio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -50,6 +50,8 @@
#include "ndp.h"
#include "dhcpv6.h"
#include "pcap.h"
+#include "netlink.h"
+#include "pasta.h"
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static struct tap_msg seq4[TAP_MSGS];
@@ -844,102 +846,23 @@ static void tap_sock_init_unix(struct ctx *c)
static int tun_ns_fd = -1;
/**
- * tap_sock_init_tun_ns() - Create tuntap fd in namespace, bring up loopback
+ * tap_ns_tun() - Get tuntap fd in namespace
* @c: Execution context
- */
-static int tap_sock_init_tun_ns(void *c)
-{
- int fd;
-
- if (ns_enter((struct ctx *)c))
- goto fail;
-
- if ((fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0)
- goto fail;
-
- tun_ns_fd = fd;
-
- if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
- perror("socket for ioctl");
- goto fail;
- }
-
- if (ioctl(fd, SIOCSIFFLAGS, &((struct ifreq){ .ifr_name = "lo",
- .ifr_flags = IFF_UP }))) {
- perror("SIOCSIFFLAGS ioctl for \"lo\"");
- close(fd);
- goto fail;
- }
-
- close(fd);
-
- return 0;
-
-fail:
- tun_ns_fd = -1;
- return 0;
-}
-
-/**
- * struct tap_sock_if_up_ns_arg - Arguments for tap_sock_if_up_ns()
- * @c: Execution context
- * @ifname: Interface name of tap device
- */
-struct tap_sock_if_up_ns_arg {
- struct ctx *c;
- char ifname[IFNAMSIZ];
-};
-
-/**
- * tap_sock_if_up_ns() - Bring up tap, get or set MAC address (if we have one)
- * @ifname: Interface name
*
- * Return: 0 -- not fundamental, the interface can be brought up later
+ * Return: 0
*/
-static int tap_sock_if_up_ns(void *arg)
+static int tap_ns_tun(void *arg)
{
- struct ifreq ifr = { .ifr_flags = IFF_UP };
- struct tap_sock_if_up_ns_arg *a;
- int fd;
-
- a = (struct tap_sock_if_up_ns_arg *)arg;
-
- if (ns_enter(a->c))
- return 0;
-
- if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
- perror("socket for ioctl");
- return 0;
- }
-
- strncpy(ifr.ifr_name, a->ifname, IFNAMSIZ);
- if (ioctl(fd, SIOCSIFFLAGS, &ifr)) {
- perror("SIOCSIFFLAGS ioctl for tap");
- goto out;
- }
-
- if (memcmp(a->c->mac_guest,
- ((uint8_t [ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }),
- ETH_ALEN)) {
- ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
- memcpy(ifr.ifr_hwaddr.sa_data, a->c->mac_guest, ETH_ALEN);
-
- if (ioctl(fd, SIOCSIFHWADDR, &ifr) < 0) {
- perror("SIOCSIFHWADDR ioctl for tap");
- goto out;
- }
- } else {
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
- perror("SIOCGIFHWADDR ioctl for tap");
- goto out;
- }
+ struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI };
+ struct ctx *c = (struct ctx *)arg;
- memcpy(a->c->mac_guest, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
- proto_update_l2_buf(a->c->mac_guest, NULL, NULL);
- }
+ strncpy(ifr.ifr_name, c->pasta_ifn, IFNAMSIZ);
-out:
- close(fd);
+ if (ns_enter(c) ||
+ (tun_ns_fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0 ||
+ ioctl(tun_ns_fd, TUNSETIFF, &ifr) ||
+ !(c->pasta_ifi = if_nametoindex(c->pasta_ifn)))
+ tun_ns_fd = -1;
return 0;
}
@@ -950,24 +873,13 @@ out:
*/
static void tap_sock_init_tun(struct ctx *c)
{
- struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI };
- struct tap_sock_if_up_ns_arg ifup_arg;
-
- NS_CALL(tap_sock_init_tun_ns, c);
+ NS_CALL(tap_ns_tun, c);
if (tun_ns_fd == -1) {
err("Failed to open tun socket in namespace");
exit(EXIT_FAILURE);
}
- strncpy(ifr.ifr_name, c->pasta_ifn, IFNAMSIZ);
- if (ioctl(tun_ns_fd, TUNSETIFF, &ifr)) {
- perror("TUNSETIFF ioctl");
- exit(EXIT_FAILURE);
- }
-
- strncpy(ifup_arg.ifname, c->pasta_ifn, IFNAMSIZ);
- ifup_arg.c = c;
- NS_CALL(tap_sock_if_up_ns, (void *)&ifup_arg);
+ pasta_ns_conf(c);
pcap_init(c, c->pasta_netns_fd);
diff --git a/tcp.c b/tcp.c
index c862632..2abcaf8 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1852,7 +1852,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
struct sockaddr_in6 addr6_ll = {
.sin6_family = AF_INET6,
.sin6_addr = c->addr6_ll,
- .sin6_scope_id = if_nametoindex(c->ifn),
+ .sin6_scope_id = c->ifi,
};
bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll));
}
diff --git a/util.c b/util.c
index 2a5c5ee..8b4d669 100644
--- a/util.c
+++ b/util.c
@@ -172,7 +172,7 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
addr6.sin6_addr = c->addr6;
} else if (bind_addr == BIND_LL) {
addr6.sin6_addr = c->addr6_ll;
- addr6.sin6_scope_id = if_nametoindex(c->ifn);
+ addr6.sin6_scope_id = c->ifi;
} else {
addr6.sin6_addr = in6addr_any;
}
diff --git a/util.h b/util.h
index 03c5ebb..605b708 100644
--- a/util.h
+++ b/util.h
@@ -39,6 +39,12 @@ void debug(const char *format, ...);
#define V6 1
#define IP_VERSIONS 2
+enum {
+ IP_VERSION_DISABLED = 0,
+ IP_VERSION_ENABLED,
+ IP_VERSION_PROBE,
+};
+
#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0])))
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
@@ -48,6 +54,9 @@ void debug(const char *format, ...);
#define PORT_EPHEMERAL_MIN ((1 << 15) + (1 << 14)) /* RFC 6335 */
#define PORT_IS_EPHEMERAL(port) ((port) >= PORT_EPHEMERAL_MIN)
+#define MAC_ZERO ((uint8_t [ETH_ALEN]){ 0 })
+#define MAC_IS_ZERO(addr) (!memcmp((addr), MAC_ZERO, ETH_ALEN))
+
#define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 4)
#define NS_CALL(fn, arg) \
do { \