aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-08-12 15:42:43 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-09-01 17:00:27 +0200
commit1e49d194d01788afbc4b8216e27c794651a4facf (patch)
tree3397d4b687a74fe9552e057138c3a795917a5afa
parent1b1b27c06a27067a7d7a380f1df545e72268c411 (diff)
downloadpasst-1e49d194d01788afbc4b8216e27c794651a4facf.tar
passt-1e49d194d01788afbc4b8216e27c794651a4facf.tar.gz
passt-1e49d194d01788afbc4b8216e27c794651a4facf.tar.bz2
passt-1e49d194d01788afbc4b8216e27c794651a4facf.tar.lz
passt-1e49d194d01788afbc4b8216e27c794651a4facf.tar.xz
passt-1e49d194d01788afbc4b8216e27c794651a4facf.tar.zst
passt-1e49d194d01788afbc4b8216e27c794651a4facf.zip
passt, pasta: Introduce command-line options and port re-mapping
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--Makefile17
-rw-r--r--conf.c1163
-rw-r--r--conf.h1
-rw-r--r--dhcp.c9
-rw-r--r--dhcpv6.c3
-rw-r--r--ndp.c14
-rw-r--r--passt.c524
-rw-r--r--passt.h40
-rw-r--r--pcap.c60
-rw-r--r--qrap.c5
-rw-r--r--tap.c92
-rw-r--r--tcp.c137
-rw-r--r--tcp.h18
-rw-r--r--udp.c174
-rw-r--r--udp.h17
-rw-r--r--util.c47
-rw-r--r--util.h14
17 files changed, 1644 insertions, 691 deletions
diff --git a/Makefile b/Makefile
index 5a6692e..b539310 100644
--- a/Makefile
+++ b/Makefile
@@ -6,20 +6,15 @@ all: passt pasta passt4netns qrap
avx2: CFLAGS += -Ofast -mavx2 -ftree-vectorize -funroll-loops
avx2: clean all
-avx2_debug: CFLAGS += -Ofast -mavx2 -ftree-vectorize -funroll-loops -DDEBUG -g
-avx2_debug: clean all
-
static: CFLAGS += -static
static: clean all
-debug: CFLAGS += -static -DDEBUG -g
-debug: clean all
-
-passt: passt.c passt.h arp.c arp.h checksum.c checksum.h dhcp.c dhcp.h \
- dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h siphash.c siphash.h \
- tap.c tap.h icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h
- $(CC) $(CFLAGS) passt.c arp.c checksum.c dhcp.c dhcpv6.c pcap.c ndp.c \
- siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt
+passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \
+ dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \
+ siphash.c siphash.h tap.c tap.h icmp.c icmp.h tcp.c tcp.h \
+ udp.c udp.h util.c util.h
+ $(CC) $(CFLAGS) passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c \
+ pcap.c ndp.c siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt
pasta: passt
ln -s passt pasta
diff --git a/conf.c b/conf.c
new file mode 100644
index 0000000..79353a1
--- /dev/null
+++ b/conf.c
@@ -0,0 +1,1163 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ * for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ * for network namespace/tap device mode
+ *
+ * conf.c - Configuration settings and option parsing
+ *
+ * Copyright (c) 2020-2021 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <ifaddrs.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <time.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include "util.h"
+#include "passt.h"
+#include "udp.h"
+#include "tcp.h"
+
+/**
+ * struct get_bound_ports_ns_arg - Arguments for get_bound_ports_ns()
+ * @c: Execution context
+ * @proto: Protocol number (IPPROTO_TCP or IPPROTO_UDP)
+ */
+struct get_bound_ports_ns_arg {
+ struct ctx *c;
+ uint8_t proto;
+};
+
+/**
+ * get_bound_ports_ns() - Get maps of ports namespace with bound sockets
+ * @arg: See struct get_bound_ports_ns_arg
+ *
+ * Return: 0
+ */
+static int get_bound_ports_ns(void *arg)
+{
+ struct get_bound_ports_ns_arg *a = (struct get_bound_ports_ns_arg *)arg;
+ struct ctx *c = a->c;
+
+ if (!c->pasta_pid || ns_enter(c->pasta_pid))
+ return 0;
+
+ if (a->proto == IPPROTO_UDP) {
+ procfs_scan_listen("udp", c->udp.port_to_tap);
+ procfs_scan_listen("udp6", c->udp.port_to_tap);
+
+ procfs_scan_listen("tcp", c->udp.port_to_tap);
+ procfs_scan_listen("tcp6", c->udp.port_to_tap);
+ } else if (a->proto == IPPROTO_TCP) {
+ procfs_scan_listen("tcp", c->tcp.port_to_tap);
+ procfs_scan_listen("tcp6", c->tcp.port_to_tap);
+ }
+
+ return 0;
+}
+
+/**
+ * get_bound_ports() - Get maps of ports in init namespace with bound sockets
+ * @c: Execution context
+ * @proto: Protocol number (IPPROTO_TCP or IPPROTO_UDP)
+ */
+static void get_bound_ports(struct ctx *c, uint8_t proto)
+{
+ if (proto == IPPROTO_UDP) {
+ procfs_scan_listen("udp", c->udp.port_to_init);
+ procfs_scan_listen("udp6", c->udp.port_to_init);
+
+ procfs_scan_listen("tcp", c->udp.port_to_init);
+ procfs_scan_listen("tcp6", c->udp.port_to_init);
+ } else if (proto == IPPROTO_TCP) {
+ procfs_scan_listen("tcp", c->tcp.port_to_init);
+ procfs_scan_listen("tcp6", c->tcp.port_to_init);
+ }
+}
+
+enum conf_port_type {
+ PORT_SPEC = 1,
+ PORT_NONE,
+ PORT_AUTO,
+ PORT_ALL,
+};
+
+static int conf_ports(struct ctx *c, char optname, const char *optarg,
+ enum conf_port_type *set)
+{
+ int start_src = -1, end_src = -1, start_dst = -1, end_dst = -1;
+ void (*remap)(in_port_t port, in_port_t delta);
+ const char *p;
+ uint8_t *map;
+ char *sep;
+
+ if (optname == 't') {
+ map = c->tcp.port_to_tap;
+ remap = tcp_remap_to_tap;
+ } else if (optname == 'T') {
+ map = c->tcp.port_to_init;
+ remap = tcp_remap_to_init;
+ } else if (optname == 'u') {
+ map = c->udp.port_to_tap;
+ remap = udp_remap_to_tap;
+ } else if (optname == 'U') {
+ map = c->udp.port_to_init;
+ remap = udp_remap_to_init;
+ } else { /* For gcc -O3 */
+ return 0;
+ }
+
+ if (!strcmp(optarg, "none")) {
+ if (*set)
+ return -EINVAL;
+ *set = PORT_NONE;
+ return 0;
+ }
+
+ if (!strcmp(optarg, "auto")) {
+ if (*set || c->mode != MODE_PASTA)
+ return -EINVAL;
+ *set = PORT_AUTO;
+ return 0;
+ }
+
+ if (!strcmp(optarg, "all")) {
+ if (*set || c->mode != MODE_PASST)
+ return -EINVAL;
+ *set = PORT_ALL;
+ memset(map, 0xff, PORT_EPHEMERAL_MIN / 8);
+ return 0;
+ }
+
+ if (*set > PORT_SPEC)
+ return -EINVAL;
+
+ *set = PORT_SPEC;
+
+ if (strspn(optarg, "0123456789-,:") != strlen(optarg)) {
+ err("Invalid port specifier %s", optarg);
+ return -EINVAL;
+ }
+
+ p = optarg;
+ do {
+ int i, port;
+
+ port = strtol(p, &sep, 10);
+ if (sep == p)
+ break;
+
+ if (port < 0 || port > USHRT_MAX || errno)
+ goto bad;
+
+ /* -p 22
+ * ^ start_src end_src == start_dst == end_dst == -1
+ *
+ * -p 22-25
+ * | ^ end_src
+ * ` start_src start_dst == end_dst == -1
+ *
+ * -p 80:8080
+ * | ^ start_dst
+ * ` start_src end_src == end_dst == -1
+ *
+ * -p 22-80:8022-8080
+ * | | | ^ end_dst
+ * | | ` start_dst
+ * | ` end_dst
+ * ` start_src
+ */
+ switch (*sep) {
+ case '-':
+ if (start_src == -1) { /* 22-... */
+ start_src = port;
+ } else {
+ if (!end_src) /* 22:8022-8080 */
+ goto bad;
+ start_dst = port; /* 22-80:8022-... */
+ }
+ break;
+ case ':':
+ if (start_src == -1) /* 80:... */
+ start_src = end_src = port;
+ else if (end_src == -1) /* 22-80:... */
+ end_src = port;
+ else /* 22-80:8022:... */
+ goto bad;
+ break;
+ case ',':
+ case 0:
+ if (start_src == -1) /* 80 */
+ start_src = end_src = port;
+ else if (end_src == -1) /* 22-25 */
+ end_src = port;
+ else if (start_dst == -1) /* 80:8080 */
+ start_dst = end_dst = port;
+ else if (end_dst == -1) /* 22-80:8022-8080 */
+ end_dst = port;
+ else
+ goto bad;
+
+ if (start_src > end_src) /* 80-22 */
+ goto bad;
+
+ if (start_dst > end_dst) /* 22-80:8080:8022 */
+ goto bad;
+
+ if (end_dst != -1 &&
+ end_dst - start_dst != end_src - start_src)
+ goto bad; /* 22-81:8022:8080 */
+
+ for (i = start_src; i <= end_src; i++) {
+ if (bitmap_isset(map, i))
+ goto overlap;
+
+ bitmap_set(map, i);
+
+ if (start_dst == -1) /* 22 or 22-80 */
+ continue;
+
+ /* 80:8080 or 22-80:8080:8080 */
+ remap(i, (in_port_t)(start_dst - start_src));
+ }
+
+ start_src = end_src = start_dst = end_dst = -1;
+ break;
+ }
+ p = sep + 1;
+ } while (*sep);
+
+ return 0;
+bad:
+ err("Invalid port specifier %s", optarg);
+ return -EINVAL;
+
+overlap:
+ err("Overlapping port specifier %s", optarg);
+ return -EINVAL;
+}
+
+/**
+ * struct nl_request - Netlink request filled and sent by get_routes()
+ * @nlh: Netlink message header
+ * @rtm: Routing Netlink message
+ */
+struct nl_request {
+ struct nlmsghdr nlh;
+ struct rtmsg rtm;
+};
+
+/**
+ * get_routes() - Get default route and fill in routable interface name
+ * @c: Execution context
+ */
+static void get_routes(struct ctx *c)
+{
+ struct nl_request req = {
+ .nlh.nlmsg_type = RTM_GETROUTE,
+ .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
+ .nlh.nlmsg_len = sizeof(struct nl_request),
+ .nlh.nlmsg_seq = 1,
+ .rtm.rtm_family = AF_INET,
+ .rtm.rtm_table = RT_TABLE_MAIN,
+ .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
+ .rtm.rtm_type = RTN_UNICAST,
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ struct nlmsghdr *nlh;
+ int s, n, na, v4, v6;
+ char ifn[IFNAMSIZ];
+ struct rtattr *rta;
+ struct rtmsg *rtm;
+ char buf[BUFSIZ];
+
+ if (!c->v4 && !c->v6)
+ v4 = v6 = -1;
+ else
+ v6 = -!(v4 = -c->v4);
+
+ s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (s < 0) {
+ perror("netlink socket");
+ goto out;
+ }
+
+ if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ perror("netlink bind");
+ goto out;
+ }
+
+v6:
+ if (send(s, &req, sizeof(req), 0) < 0) {
+ perror("netlink send");
+ goto out;
+ }
+
+ n = recv(s, &buf, sizeof(buf), 0);
+ if (n < 0) {
+ perror("netlink recv");
+ goto out;
+ }
+
+ nlh = (struct nlmsghdr *)buf;
+ for ( ; NLMSG_OK(nlh, n); nlh = NLMSG_NEXT(nlh, n)) {
+ rtm = (struct rtmsg *)NLMSG_DATA(nlh);
+
+ if (rtm->rtm_dst_len ||
+ (rtm->rtm_family != AF_INET && rtm->rtm_family != AF_INET6))
+ continue;
+
+ /* Filter on interface only if already given */
+ if (*c->ifn) {
+ *ifn = 0;
+ for (rta = (struct rtattr *)RTM_RTA(rtm),
+ na = RTM_PAYLOAD(nlh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type != RTA_OIF)
+ continue;
+
+ if_indextoname(*(unsigned *)RTA_DATA(rta), ifn);
+ break;
+ }
+
+ if (strcmp(ifn, c->ifn))
+ goto next;
+ }
+
+ for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nlh);
+ RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (!*c->ifn && rta->rta_type == RTA_OIF)
+ if_indextoname(*(unsigned *)RTA_DATA(rta), ifn);
+
+ if (v4 && rta->rta_type == RTA_GATEWAY &&
+ rtm->rtm_family == AF_INET) {
+ if (!c->gw4) {
+ memcpy(&c->gw4, RTA_DATA(rta),
+ sizeof(c->gw4));
+ }
+ v4 = 1;
+ }
+
+ if (v6 && rta->rta_type == RTA_GATEWAY &&
+ rtm->rtm_family == AF_INET6) {
+ if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6)) {
+ memcpy(&c->gw6, RTA_DATA(rta),
+ sizeof(c->gw6));
+ }
+ v6 = 1;
+ }
+ }
+
+next:
+ if (nlh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+
+ if (v6 == -1) {
+ req.rtm.rtm_family = AF_INET6;
+ req.nlh.nlmsg_seq++;
+ recv(s, &buf, sizeof(buf), 0);
+ v6--;
+ goto v6;
+ } else if (v6 < 0) {
+ v6 = 0;
+ }
+
+out:
+ close(s);
+
+ if ((v4 <= 0 && v6 <= 0) || (!*c->ifn && !*ifn)) {
+ err("No routing information");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!*c->ifn)
+ strncpy(c->ifn, ifn, IFNAMSIZ);
+ c->v4 = v4;
+ c->v6 = v6;
+}
+
+/**
+ * get_addrs() - Fetch MAC, IP addresses, masks of external routable interface
+ * @c: Execution context
+ */
+static void get_addrs(struct ctx *c)
+{
+ struct ifreq ifr = {
+ .ifr_addr.sa_family = AF_INET,
+ };
+ struct ifaddrs *ifaddr, *ifa;
+ int s, v4 = 0, v6 = 0;
+
+ if (getifaddrs(&ifaddr) == -1) {
+ perror("getifaddrs");
+ goto out;
+ }
+
+ if (c->addr4) {
+ c->addr4_seen = c->addr4;
+ v4 = 1;
+ }
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&c->addr6)) {
+ memcpy(&c->addr6_seen, &c->addr6, sizeof(c->addr6));
+ memcpy(&c->addr6_ll_seen, &c->addr6, sizeof(c->addr6));
+ v6 = 1;
+ }
+
+ /* Fill in any missing information */
+ for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
+ struct sockaddr_in *in_addr;
+ struct sockaddr_in *in_mask;
+ struct sockaddr_in6 *in6_addr;
+
+ if (strcmp(ifa->ifa_name, c->ifn))
+ continue;
+
+ if (!ifa->ifa_addr)
+ continue;
+
+ in_addr = (struct sockaddr_in *)ifa->ifa_addr;
+ if (ifa->ifa_addr->sa_family == AF_INET && !c->addr4) {
+ c->addr4_seen = c->addr4 = in_addr->sin_addr.s_addr;
+ v4 = 1;
+ }
+
+ if (ifa->ifa_addr->sa_family == AF_INET && !c->mask4 &&
+ in_addr->sin_addr.s_addr == c->addr4) {
+ in_mask = (struct sockaddr_in *)ifa->ifa_netmask;
+ c->mask4 = in_mask->sin_addr.s_addr;
+ }
+
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ in6_addr = (struct sockaddr_in6 *)ifa->ifa_addr;
+ if (IN6_IS_ADDR_LINKLOCAL(&in6_addr->sin6_addr) &&
+ IN6_IS_ADDR_UNSPECIFIED(&c->addr6_ll)) {
+ memcpy(&c->addr6_ll, &in6_addr->sin6_addr,
+ sizeof(c->addr6_ll));
+ } else if (IN6_IS_ADDR_UNSPECIFIED(&c->addr6)) {
+ memcpy(&c->addr6, &in6_addr->sin6_addr,
+ sizeof(c->addr6));
+ memcpy(&c->addr6_seen, &in6_addr->sin6_addr,
+ sizeof(c->addr6_seen));
+ memcpy(&c->addr6_ll_seen, &in6_addr->sin6_addr,
+ sizeof(c->addr6_seen));
+ v6 = 1;
+ }
+ }
+ }
+
+ freeifaddrs(ifaddr);
+
+ if (v4 < c->v4 || v6 < c->v6)
+ goto out;
+
+ if (v4 && !c->mask4) {
+ if (IN_CLASSA(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSA_NET);
+ else if (IN_CLASSB(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSB_NET);
+ else if (IN_CLASSC(ntohl(c->addr4)))
+ c->mask4 = htonl(IN_CLASSC_NET);
+ else
+ c->mask4 = 0xffffffff;
+ }
+
+ if (!memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN)) {
+ s = socket(AF_INET, SOCK_DGRAM, 0);
+ if (s < 0) {
+ perror("socket SIOCGIFHWADDR");
+ goto out;
+ }
+
+ strncpy(ifr.ifr_name, c->ifn, IF_NAMESIZE);
+ if (ioctl(s, SIOCGIFHWADDR, &ifr) < 0) {
+ perror("SIOCGIFHWADDR");
+ goto out;
+ }
+
+ close(s);
+ memcpy(c->mac, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+ }
+
+ memset(&c->mac_guest, 0xff, sizeof(c->mac_guest));
+
+ return;
+out:
+ err("Couldn't get addresses for routable interface");
+ exit(EXIT_FAILURE);
+}
+
+/**
+ * get_dns() - Get nameserver addresses from local /etc/resolv.conf
+ * @c: Execution context
+ */
+static void get_dns(struct ctx *c)
+{
+ int dns4_set, dns6_set, dnss_set, dns_set;
+ struct in6_addr *dns6 = &c->dns6[0];
+ struct fqdn *s = c->dns_search;
+ uint32_t *dns4 = &c->dns4[0];
+ char buf[BUFSIZ], *p, *end;
+ FILE *r;
+
+ dns4_set = !!*dns4;
+ dns6_set = !IN6_IS_ADDR_UNSPECIFIED(&dns6);
+ dnss_set = !!*s->n || c->no_dns_search;
+ dns_set = dns4_set || dns6_set || c->no_dns;
+
+ if (dns_set && dnss_set)
+ return;
+
+ r = fopen("/etc/resolv.conf", "r");
+ if (!r)
+ goto out;
+
+ while (fgets(buf, BUFSIZ, r)) {
+ if (!dns_set && strstr(buf, "nameserver ") == buf) {
+ p = strrchr(buf, ' ');
+ if (!p)
+ continue;
+
+ end = strpbrk(buf, "%\n");
+ if (end)
+ *end = 0;
+
+ if (dns4 - &c->dns4[0] < ARRAY_SIZE(c->dns4) &&
+ inet_pton(AF_INET, p + 1, dns4))
+ dns4++;
+
+ if (dns6 - &c->dns6[0] < ARRAY_SIZE(c->dns6) &&
+ inet_pton(AF_INET6, p + 1, dns6))
+ dns6++;
+ } else if (!dnss_set && strstr(buf, "search ") == buf &&
+ s == c->dns_search) {
+ end = strpbrk(buf, "\n");
+ if (end)
+ *end = 0;
+
+ p = strtok(buf, " \t");
+ while ((p = strtok(NULL, " \t")) &&
+ s - c->dns_search < ARRAY_SIZE(c->dns_search)) {
+ strncpy(s->n, p, sizeof(c->dns_search[0]));
+ s++;
+ }
+ }
+ }
+
+ fclose(r);
+
+out:
+ if (!dns_set && dns4 == c->dns4 && dns6 == c->dns6)
+ warn("Couldn't get any nameserver address");
+}
+
+/**
+ * usage() - Print usage and exit
+ * @name: Executable name
+ */
+static void usage(const char *name)
+{
+ if (strstr(name, "pasta") || strstr(name, "passt4netns")) {
+ info("Usage: %s [OPTION]... [TARGET_PID]", name);
+ info("");
+ info("Without TARGET_PID, enter a user and network namespace,");
+ info("run the default shell and connect it via pasta.");
+ } else {
+ info("Usage: %s [OPTION]...", name);
+ }
+ info("");
+
+ info( " -d, --debug Be verbose, don't run in background");
+ info( " -q, --quiet Don't print informational messages");
+ info( " -f, --foreground Don't run in background");
+ info( " default: run in background if started from a TTY");
+ info( " -e, --stderr Log to stderr too");
+ info( " default: log to system logger only if started from a TTY");
+ info( " -h, --help Display this help message and exit");
+
+ if (strstr(name, "pasta") || strstr(name, "passt4netns")) {
+ info( " -I, --ns-ifname NAME namespace interface name");
+ info( " default: same interface name as external one");
+ } else {
+ info( " -s, --socket PATH UNIX domain socket path");
+ info( " default: probe free path starting from "
+ UNIX_SOCK_PATH, 1);
+ }
+
+ info( " -p, --pcap [FILE] Log tap-facing traffic to pcap file");
+ info( " if FILE is not given, log to:");
+
+ if (strstr(name, "pasta") || strstr(name, "passt4netns"))
+ info(" pasta_ISO8601-TIMESTAMP_INSTANCE-NUMBER.pcap");
+ else
+ info(" passt_ISO8601-TIMESTAMP_INSTANCE-NUMBER.pcap");
+
+ info( " -m, --mtu MTU Assign MTU via DHCP/NDP");
+ info( " default: no MTU assigned via DHCP/NDP options");
+ info( " -a, --address ADDR Assign IPv4 or IPv6 address ADDR");
+ info( " can be specified zero to two times (for IPv4 and IPv6)");
+ info( " default: use addresses from interface with default route");
+ info( " -n, --netmask MASK Assign IPv4 MASK, dot-decimal or bits");
+ info( " default: netmask from matching address on the host");
+ info( " -M, --mac-addr ADDR Use source MAC address ADDR");
+ info( " default: MAC address from interface with default route");
+ info( " -g, --gateway ADDR Pass IPv4 or IPv6 address as gateway");
+ info( " default: gateway from interface with default route");
+ info( " -i, --interface NAME Interface for addresses and routes");
+ info( " default: interface with first default route");
+ info( " -D, --dns ADDR Pass IPv4 or IPv6 address as DNS");
+ info( " can be specified multiple times");
+ info( " a single, empty option disables DNS information");
+ if (strstr(name, "pasta") || strstr(name, "passt4netns"))
+ info( " default: don't send any addresses");
+ else
+ info( " default: use addresses from /etc/resolv.conf");
+
+ info( " -S, --search LIST Space-separated list, search domains");
+ info( " a single, empty option disables the DNS search list");
+ if (strstr(name, "pasta") || strstr(name, "passt4netns"))
+ info( " default: don't send any search list");
+ else
+ info( " default: use search list from /etc/resolv.conf");
+
+ info( " --no-tcp Disable TCP protocol handler");
+ info( " --no-udp Disable UDP protocol handler");
+ info( " --no-icmp Disable ICMP/ICMPv6 protocol handler");
+ info( " --no-dhcp Disable DHCP server");
+ info( " --no-ndp Disable NDP responses");
+ info( " --no-dhcpv6 Disable DHCPv6 server");
+ info( " --no-ra Disable router advertisements");
+ info( " -4, --ipv4-only Enable IPv4 operation only");
+ info( " -6, --ipv6-only Enable IPv6 operation only");
+
+ if (strstr(name, "pasta") || strstr(name, "passt4netns"))
+ goto pasta_ports;
+
+ info( " -t, --tcp-ports SPEC TCP port forwarding to guest");
+ info( " can be specified multiple times");
+ info( " SPEC can be:");
+ info( " 'none': don't forward any ports");
+ info( " 'all': forward all unbound, non-ephemeral ports");
+ info( " a comma-separated of ports, optionally ranged with '-'");
+ info( " and optional target ports after ':'. Examples:");
+ info( " -t 22 Forward local port 22 to 22 on guest");
+ info( " -t 22:23 Forward local port 22 to 23 on guest");
+ info( " -t 22,25 Forward ports 22, 25 to ports 22, 25");
+ info( " -t 22-80 Forward ports 22 to 80");
+ info( " -t 22-80:32-90 Forward ports 22 to 80 to");
+ info( " corresponding port numbers plus 10");
+ info( " default: none");
+ info( " -u, --udp-ports SPEC UDP port forwarding to guest");
+ info( " SPEC is as described for TCP above");
+ info( " default: none");
+ info( " unless overridden, UDP ports with numbers corresponding");
+ info( " to forwarded TCP port numbers are forwarded too, and");
+ info( " IPv6 bound ports are also forwarded for IPv4");
+
+ exit(EXIT_FAILURE);
+
+pasta_ports:
+ info( " -t, --tcp-ports SPEC TCP port forwarding to namespace");
+ info( " can be specified multiple times");
+ info( " SPEC can be:");
+ info( " 'none': don't forward any ports");
+ info( " 'auto': forward all ports currently bound in namespace");
+ info( " a comma-separated of ports, optionally ranged with '-'");
+ info( " and optional target ports after ':'. Examples:");
+ info( " -t 22 Forward local port 22 to port 22 in netns");
+ info( " -t 22:23 Forward local port 22 to port 23");
+ info( " -t 22,25 Forward ports 22, 25 to ports 22, 25");
+ info( " -t 22-80 Forward ports 22 to 80");
+ info( " -t 22-80:32-90 Forward ports 22 to 80 to");
+ info( " corresponding port numbers plus 10");
+ info( " default: auto");
+ info( " IPv6 bound ports are also forwarded for IPv4");
+ info( " -u, --udp-ports SPEC UDP port forwarding to namespace");
+ info( " SPEC is as described for TCP above");
+ info( " default: auto");
+ info( " unless overridden, UDP ports with numbers corresponding");
+ info( " to forwarded TCP port numbers are forwarded too, and");
+ info( " IPv6 bound ports are also forwarded for IPv4");
+ info( " -T, --tcp-ns SPEC TCP port forwarding to init namespace");
+ info( " SPEC is as described above");
+ info( " default: auto");
+ info( " -U, --udp-ns SPEC UDP port forwarding to init namespace");
+ info( " SPEC is as described above");
+ info( " default: auto");
+
+ exit(EXIT_FAILURE);
+}
+
+void conf_print(struct ctx *c)
+{
+ char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN];
+ int i;
+
+ if (c->mode == MODE_PASTA) {
+ info("Outbound interface: %s, namespace interface: %s",
+ c->ifn, c->pasta_ifn);
+ } else {
+ info("Outbound interface: %s", c->ifn);
+ }
+
+ if (c->v4) {
+ info("ARP:");
+ info(" address: %02x:%02x:%02x:%02x:%02x:%02x",
+ c->mac[0], c->mac[1], c->mac[2],
+ c->mac[3], c->mac[4], c->mac[5]);
+
+ if (!c->no_dhcp) {
+ info("DHCP:");
+ info(" assign: %s",
+ inet_ntop(AF_INET, &c->addr4, buf4, sizeof(buf4)));
+ info(" mask: %s",
+ inet_ntop(AF_INET, &c->mask4, buf4, sizeof(buf4)));
+ info(" router: %s",
+ inet_ntop(AF_INET, &c->gw4, buf4, sizeof(buf4)));
+ }
+ }
+
+ if (!c->no_dns && !(c->no_dhcp && c->no_ndp && c->no_dhcpv6)) {
+ for (i = 0; c->dns4[i]; i++) {
+ if (!i)
+ info(" DNS:");
+ inet_ntop(AF_INET, &c->dns4[i], buf4, sizeof(buf4));
+ info(" %s", buf4);
+ }
+ }
+
+ if (!c->no_dns_search && !(c->no_dhcp && c->no_ndp && c->no_dhcpv6)) {
+ for (i = 0; *c->dns_search[i].n; i++) {
+ if (!i)
+ info(" search:");
+ info(" %s", c->dns_search[i].n);
+ }
+ }
+
+ if (c->v6) {
+ if (!c->no_ndp && !c->no_dhcpv6)
+ info("NDP/DHCPv6:");
+ else if (!c->no_ndp)
+ info("DHCPv6:");
+ else if (!c->no_dhcpv6)
+ info("NDP:");
+ else
+ return;
+
+ info(" assign: %s",
+ inet_ntop(AF_INET6, &c->addr6, buf6, sizeof(buf6)));
+ info(" router: %s",
+ inet_ntop(AF_INET6, &c->gw6, buf6, sizeof(buf6)));
+
+ for (i = 0; !IN6_IS_ADDR_UNSPECIFIED(&c->dns6[i]); i++) {
+ if (!i)
+ info(" DNS:");
+ inet_ntop(AF_INET6, &c->dns6[i], buf6, sizeof(buf6));
+ info(" %s", buf6);
+ }
+
+ for (i = 0; *c->dns_search[i].n; i++) {
+ if (!i)
+ info(" search:");
+ info(" %s", c->dns_search[i].n);
+ }
+ }
+}
+
+/**
+ * conf() - Process command-line arguments and set configuration
+ * @c: Execution context
+ * @argc: Argument count
+ * @argv: Options, plus target PID for pasta mode
+ */
+void conf(struct ctx *c, int argc, char **argv)
+{
+ struct option options[] = {
+ {"debug", no_argument, NULL, 'd' },
+ {"quiet", no_argument, NULL, 1 },
+ {"foreground", no_argument, NULL, 'f' },
+ {"stderr", no_argument, &c->stderr, 1 },
+ {"help", no_argument, NULL, 'h' },
+ {"socket", required_argument, NULL, 's' },
+ {"ns-ifname", required_argument, NULL, 'I' },
+ {"pcap", optional_argument, NULL, 'p' },
+ {"mtu", required_argument, NULL, 'm' },
+ {"address", required_argument, NULL, 'a' },
+ {"netmask", required_argument, NULL, 'n' },
+ {"mac-addr", required_argument, NULL, 'M' },
+ {"gateway", required_argument, NULL, 'g' },
+ {"interface", required_argument, NULL, 'i' },
+ {"dns", optional_argument, NULL, 'D' },
+ {"search", optional_argument, NULL, 'S' },
+ {"no-tcp", no_argument, &c->no_tcp, 1 },
+ {"no-udp", no_argument, &c->no_udp, 1 },
+ {"no-icmp", no_argument, &c->no_icmp, 1 },
+ {"no-dhcp", no_argument, &c->no_dhcp, 1 },
+ {"no-dhcpv6", no_argument, &c->no_dhcpv6, 1 },
+ {"no-ndp", no_argument, &c->no_ndp, 1 },
+ {"no-ra", no_argument, &c->no_ra, 1 },
+ {"ipv4-only", no_argument, &c->v4, '4' },
+ {"ipv6-only", no_argument, &c->v6, '6' },
+ {"tcp-ports", required_argument, NULL, 't' },
+ {"udp-ports", required_argument, NULL, 'u' },
+ {"tcp-ns", required_argument, NULL, 'T' },
+ {"udp-ns", required_argument, NULL, 'U' },
+ { 0 },
+ };
+ struct get_bound_ports_ns_arg ns_ports_arg = { .c = c };
+ enum conf_port_type tcp_tap = 0, tcp_init = 0;
+ enum conf_port_type udp_tap = 0, udp_init = 0;
+ struct fqdn *dnss = c->dns_search;
+ struct in6_addr *dns6 = c->dns6;
+ int name, ret, mask, b, i;
+ uint32_t *dns4 = c->dns4;
+
+ do {
+ enum conf_port_type *set;
+ const char *optstring;
+
+ if (c->mode == MODE_PASST)
+ optstring = "dqfehs:p::m:a:n:M:g:i:D::S::46t:u:";
+ else
+ optstring = "dqfehI:p::m:a:n:M:g:i:D::S::46t:u:T:U:";
+
+ name = getopt_long(argc, argv, optstring, options, NULL);
+
+ if ((name == 'p' || name == 'D' || name == 'S') && !optarg &&
+ optind < argc && *argv[optind] && *argv[optind] != '-') {
+ if (c->mode == MODE_PASTA) {
+ char *endptr;
+
+ strtol(argv[optind], &endptr, 10);
+ if (*endptr)
+ optarg = argv[optind++];
+ } else {
+ optarg = argv[optind++];
+ }
+ }
+
+ switch (name) {
+ case -1:
+ case 0:
+ break;
+ case 'd':
+ if (c->debug) {
+ err("Multiple --debug options given");
+ usage(argv[0]);
+ }
+
+ if (c->quiet) {
+ err("Either --debug or --quiet");
+ usage(argv[0]);
+ }
+
+ c->debug = 1;
+ c->foreground = 1;
+ break;
+ case 'q':
+ if (c->quiet) {
+ err("Multiple --quiet options given");
+ usage(argv[0]);
+ }
+
+ if (c->debug) {
+ err("Either --debug or --quiet");
+ usage(argv[0]);
+ }
+
+ c->quiet = 1;
+ break;
+ case 'f':
+ if (c->foreground && !c->debug) {
+ err("Multiple --foreground options given");
+ usage(argv[0]);
+ }
+
+ c->foreground = 1;
+ break;
+ case '?':
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 's':
+ if (*c->sock_path) {
+ err("Multiple --socket options given");
+ usage(argv[0]);
+ }
+
+ ret = snprintf(c->sock_path, sizeof(c->sock_path), "%s",
+ optarg);
+ if (ret <= 0 || ret >= (int)sizeof(c->pcap)) {
+ err("Invalid socket path: %s", optarg);
+ usage(argv[0]);
+ }
+ break;
+ case 'I':
+ if (*c->pasta_ifn) {
+ err("Multiple --ns-ifname options given");
+ usage(argv[0]);
+ }
+
+ ret = snprintf(c->pasta_ifn, sizeof(c->pasta_ifn), "%s",
+ optarg);
+ if (ret <= 0 || ret >= (int)sizeof(c->pasta_ifn)) {
+ err("Invalid interface name: %s", optarg);
+ usage(argv[0]);
+ }
+ break;
+ case 'p':
+ if (*c->pcap) {
+ err("Multiple --pcap options given");
+ usage(argv[0]);
+ }
+
+ if (!optarg) {
+ *c->pcap = 1;
+ break;
+ }
+
+ ret = snprintf(c->pcap, sizeof(c->pcap), "%s", optarg);
+ if (ret <= 0 || ret >= (int)sizeof(c->pcap)) {
+ err("Invalid pcap path: %s", optarg);
+ usage(argv[0]);
+ }
+ break;
+ case 'm':
+ if (c->mtu) {
+ err("Multiple --mtu options given");
+ usage(argv[0]);
+ }
+
+ errno = 0;
+ c->mtu = strtol(optarg, NULL, 0);
+ if (c->mtu < ETH_MIN_MTU || c->mtu > (int)ETH_MAX_MTU ||
+ errno) {
+ err("Invalid MTU: %s", optarg);
+ usage(argv[0]);
+ }
+ break;
+ case 'a':
+ if (IN6_IS_ADDR_UNSPECIFIED(&c->addr6) &&
+ inet_pton(AF_INET6, optarg, &c->addr6) &&
+ !IN6_IS_ADDR_UNSPECIFIED(&c->addr6) &&
+ !IN6_IS_ADDR_LOOPBACK(&c->addr6) &&
+ !IN6_IS_ADDR_V4MAPPED(&c->addr6) &&
+ !IN6_IS_ADDR_V4COMPAT(&c->addr6) &&
+ !IN6_IS_ADDR_MULTICAST(&c->addr6))
+ break;
+
+ if (c->addr4 == INADDR_ANY &&
+ inet_pton(AF_INET, optarg, &c->addr4) &&
+ c->addr4 != INADDR_ANY &&
+ c->addr4 != INADDR_BROADCAST &&
+ c->addr4 != INADDR_LOOPBACK &&
+ !IN_MULTICAST(c->addr4))
+ break;
+
+ err("Invalid address: %s", optarg);
+ usage(argv[0]);
+ break;
+ case 'n':
+ if (inet_pton(AF_INET, optarg, &c->mask4))
+ break;
+
+ errno = 0;
+ mask = strtol(optarg, NULL, 0);
+ if (mask >= 0 && mask <= 32 && !errno) {
+ c->mask4 = htonl(0xffffffff << (32 - mask));
+ break;
+ }
+
+ err("Invalid netmask: %s", optarg);
+ usage(argv[0]);
+ break;
+ case 'M':
+ for (i = 0; i < ETH_ALEN; i++) {
+ errno = 0;
+ b = strtol(optarg + i * 3, NULL, 16);
+ if (b < 0 || b > UCHAR_MAX || errno) {
+ err("Invalid MAC address: %s", optarg);
+ usage(argv[0]);
+ }
+ c->mac[i] = b;
+ }
+ break;
+ case 'g':
+ if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6) &&
+ inet_pton(AF_INET6, optarg, &c->gw6) &&
+ !IN6_IS_ADDR_UNSPECIFIED(&c->gw6) &&
+ !IN6_IS_ADDR_LOOPBACK(&c->gw6))
+ break;
+
+ if (c->gw4 == INADDR_ANY &&
+ inet_pton(AF_INET, optarg, &c->gw4) &&
+ c->gw4 != INADDR_ANY &&
+ c->gw4 != INADDR_BROADCAST &&
+ c->gw4 != INADDR_LOOPBACK)
+ break;
+
+ err("Invalid gateway address: %s", optarg);
+ usage(argv[0]);
+ break;
+ case 'i':
+ if (*c->ifn) {
+ err("Redundant interface: %s", optarg);
+ usage(argv[0]);
+ }
+
+ strncpy(c->ifn, optarg, IFNAMSIZ - 1);
+ break;
+ case 'D':
+ if (c->no_dns ||
+ (!optarg && (dns4 - c->dns4 || dns6 - c->dns6))) {
+ err("Empty and non-empty DNS options given");
+ usage(argv[0]);
+ }
+
+ if (!optarg) {
+ c->no_dns = 1;
+ break;
+ }
+
+ if (dns4 - &c->dns4[0] < ARRAY_SIZE(c->dns4) &&
+ inet_pton(AF_INET, optarg, dns4)) {
+ dns4++;
+ break;
+ }
+
+ if (dns6 - &c->dns6[0] < ARRAY_SIZE(c->dns6) &&
+ inet_pton(AF_INET6, optarg, dns6)) {
+ dns6++;
+ break;
+ }
+
+ err("Cannot use DNS address %s", optarg);
+ usage(argv[0]);
+ break;
+ case 'S':
+ if (c->no_dns_search ||
+ (!optarg && dnss != c->dns_search)) {
+ err("Empty and non-empty DNS search given");
+ usage(argv[0]);
+ }
+
+ if (!optarg) {
+ c->no_dns_search = 1;
+ break;
+ }
+
+ if (dnss - c->dns_search < ARRAY_SIZE(c->dns_search)) {
+ ret = snprintf(dnss->n, sizeof(*c->dns_search),
+ "%s", optarg);
+ dnss++;
+
+ if (ret > 0 &&
+ ret < (int)sizeof(*c->dns_search))
+ break;
+ }
+
+ err("Cannot use DNS search domain %s", optarg);
+ usage(argv[0]);
+ break;
+ case '4':
+ c->v4 = 1;
+ break;
+ case '6':
+ c->v6 = 1;
+ break;
+ case 't':
+ case 'u':
+ case 'T':
+ case 'U':
+ if (name == 't')
+ set = &tcp_tap;
+ else if (name == 'T')
+ set = &tcp_init;
+ else if (name == 'u')
+ set = &udp_tap;
+ else if (name == 'U')
+ set = &udp_init;
+
+ if (conf_ports(c, name, optarg, set))
+ usage(argv[0]);
+
+ break;
+ }
+ } while (name != -1);
+
+ if (c->mode == MODE_PASTA && optind + 1 == argc) {
+ c->pasta_pid = strtol(argv[optind], NULL, 0);
+ if (c->pasta_pid < 0 || errno)
+ usage(argv[0]);
+ } else if (optind != argc) {
+ usage(argv[0]);
+ }
+
+ if (c->v4 && c->v6) {
+ err("Options ipv4-only and ipv6-only are mutually exclusive");
+ usage(argv[0]);
+ }
+
+ if (c->v4 || c->v6) {
+ if (!c->v4)
+ c->no_dhcp = 1;
+
+ if (!c->v6) {
+ c->no_ndp = 1;
+ c->no_dhcpv6 = 1;
+ }
+ }
+
+ get_routes(c);
+ get_addrs(c);
+
+ if (c->mode == MODE_PASTA && dns4 == c->dns4 && dns6 == c->dns6)
+ c->no_dns = 1;
+ if (c->mode == MODE_PASTA && dnss == c->dns_search)
+ c->no_dns_search = 1;
+ get_dns(c);
+
+ if (!*c->pasta_ifn)
+ strncpy(c->pasta_ifn, c->ifn, IFNAMSIZ);
+
+ if (c->mode == MODE_PASTA) {
+ if (!tcp_tap || tcp_tap == PORT_AUTO) {
+ ns_ports_arg.proto = IPPROTO_TCP;
+ NS_CALL(get_bound_ports_ns, &ns_ports_arg);
+ }
+ if (!udp_tap || udp_tap == PORT_AUTO) {
+ ns_ports_arg.proto = IPPROTO_UDP;
+ NS_CALL(get_bound_ports_ns, &ns_ports_arg);
+ }
+ if (!tcp_init || tcp_init == PORT_AUTO)
+ get_bound_ports(c, IPPROTO_TCP);
+ if (!udp_init || udp_init == PORT_AUTO)
+ get_bound_ports(c, IPPROTO_UDP);
+ }
+
+ conf_print(c);
+}
diff --git a/conf.h b/conf.h
new file mode 100644
index 0000000..1bb2e73
--- /dev/null
+++ b/conf.h
@@ -0,0 +1 @@
+void conf(struct ctx *c, int argc, char **argv);
diff --git a/dhcp.c b/dhcp.c
index ed5df27..04b04af 100644
--- a/dhcp.c
+++ b/dhcp.c
@@ -272,6 +272,9 @@ int dhcp(struct ctx *c, struct ethhdr *eh, size_t len)
if (uh->dest != htons(67))
return 0;
+ if (c->no_dhcp)
+ return 1;
+
mlen = len - sizeof(*eh) - iph->ihl * 4 - sizeof(*uh);
if (mlen != ntohs(uh->len) - sizeof(*uh) ||
mlen < offsetof(struct msg, o) ||
@@ -305,6 +308,12 @@ int dhcp(struct ctx *c, struct ethhdr *eh, size_t len)
*(unsigned long *)opts[3].s = c->gw4;
*(unsigned long *)opts[54].s = c->gw4;
+ if (c->mtu) {
+ opts[26].slen = 2;
+ opts[26].s[0] = c->mtu / 256;
+ opts[26].s[1] = c->mtu % 256;
+ }
+
for (i = 0, opts[6].slen = 0; c->dns4[i]; i++) {
((uint32_t *)opts[6].s)[i] = c->dns4[i];
opts[6].slen += sizeof(uint32_t);
diff --git a/dhcpv6.c b/dhcpv6.c
index c4f7a9a..19c7a62 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -461,6 +461,9 @@ int dhcpv6(struct ctx *c, struct ethhdr *eh, size_t len)
if (!uh || proto != IPPROTO_UDP || uh->dest != htons(547))
return 0;
+ if (c->no_dhcpv6)
+ return 1;
+
if (!IN6_IS_ADDR_MULTICAST(&ip6h->daddr))
return -1;
diff --git a/ndp.c b/ndp.c
index b676825..74445ad 100644
--- a/ndp.c
+++ b/ndp.c
@@ -64,6 +64,9 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len)
ih->icmp6_type < RS || ih->icmp6_type > NA)
return 0;
+ if (c->no_ndp)
+ return 1;
+
ehr = (struct ethhdr *)buf;
ip6hr = (struct ipv6hdr *)(ehr + 1);
ihr = (struct icmp6hdr *)(ip6hr + 1);
@@ -91,6 +94,9 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len)
size_t len = 0;
int i, n;
+ if (c->no_ra)
+ return 1;
+
info("NDP: received RS, sending RA");
ihr->icmp6_type = RA;
ihr->icmp6_code = 0;
@@ -110,6 +116,14 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len)
memcpy(p, &c->addr6, 8); /* prefix */
p += 16;
+ if (c->mtu) {
+ *p++ = 5; /* type */
+ *p++ = 1; /* length */
+ p += 2; /* reserved */
+ *(uint32_t *)p = htonl(c->mtu); /* MTU */
+ p += 4;
+ }
+
for (n = 0; !IN6_IS_ADDR_UNSPECIFIED(&c->dns6[n]); n++);
if (n) {
*p++ = 25; /* RDNSS */
diff --git a/passt.c b/passt.c
index ff850e7..0f8ac77 100644
--- a/passt.c
+++ b/passt.c
@@ -25,13 +25,13 @@
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/types.h>
-#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
#include <sys/resource.h>
#include <sys/uio.h>
-#include <ifaddrs.h>
+#include <sys/wait.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
-#include <arpa/inet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
@@ -46,8 +46,6 @@
#include <netdb.h>
#include <string.h>
#include <errno.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#include <time.h>
#include <syslog.h>
#include <sys/stat.h>
@@ -60,6 +58,7 @@
#include "udp.h"
#include "pcap.h"
#include "tap.h"
+#include "conf.h"
#define EPOLL_EVENTS 10
@@ -68,7 +67,6 @@
char pkt_buf [PKT_BUF_BYTES];
-#ifdef DEBUG
char *ip_proto_str[IPPROTO_SCTP + 1] = {
[IPPROTO_ICMP] = "ICMP",
[IPPROTO_TCP] = "TCP",
@@ -76,318 +74,6 @@ char *ip_proto_str[IPPROTO_SCTP + 1] = {
[IPPROTO_ICMPV6] = "ICMPV6",
[IPPROTO_SCTP] = "SCTP",
};
-#endif
-
-/**
- * struct nl_request - Netlink request filled and sent by get_routes()
- * @nlh: Netlink message header
- * @rtm: Routing Netlink message
- */
-struct nl_request {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
-};
-
-/**
- * get_routes() - Get default route and fill in routable interface name
- * @c: Execution context
- */
-static void get_routes(struct ctx *c)
-{
- struct nl_request req = {
- .nlh.nlmsg_type = RTM_GETROUTE,
- .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
- .nlh.nlmsg_len = sizeof(struct nl_request),
- .nlh.nlmsg_seq = 1,
- .rtm.rtm_family = AF_INET,
- .rtm.rtm_table = RT_TABLE_MAIN,
- .rtm.rtm_scope = RT_SCOPE_UNIVERSE,
- .rtm.rtm_type = RTN_UNICAST,
- };
- struct sockaddr_nl addr = {
- .nl_family = AF_NETLINK,
- };
- struct nlmsghdr *nlh;
- struct rtattr *rta;
- struct rtmsg *rtm;
- char buf[BUFSIZ];
- int s, n, na;
-
- c->v6 = -1;
-
- s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (s < 0) {
- perror("netlink socket");
- goto out;
- }
-
- if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
- perror("netlink bind");
- goto out;
- }
-
-v6:
- if (send(s, &req, sizeof(req), 0) < 0) {
- perror("netlink send");
- goto out;
- }
-
- n = recv(s, &buf, sizeof(buf), 0);
- if (n < 0) {
- perror("netlink recv");
- goto out;
- }
-
- nlh = (struct nlmsghdr *)buf;
- for ( ; NLMSG_OK(nlh, n); nlh = NLMSG_NEXT(nlh, n)) {
- rtm = (struct rtmsg *)NLMSG_DATA(nlh);
-
- if (rtm->rtm_dst_len ||
- (rtm->rtm_family != AF_INET && rtm->rtm_family != AF_INET6))
- continue;
-
- rta = (struct rtattr *)RTM_RTA(rtm);
- na = RTM_PAYLOAD(nlh);
- for ( ; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
- if (rta->rta_type == RTA_GATEWAY &&
- rtm->rtm_family == AF_INET && !c->v4) {
- memcpy(&c->gw4, RTA_DATA(rta), sizeof(c->gw4));
- c->v4 = 1;
- }
-
- if (rta->rta_type == RTA_GATEWAY &&
- rtm->rtm_family == AF_INET6 && !c->v6) {
- memcpy(&c->gw6, RTA_DATA(rta), sizeof(c->gw6));
- c->v6 = 1;
- }
-
- if (rta->rta_type == RTA_OIF && !*c->ifn) {
- if_indextoname(*(unsigned *)RTA_DATA(rta),
- c->ifn);
- }
- }
-
- if (nlh->nlmsg_type == NLMSG_DONE)
- break;
- }
-
- if (c->v6 == -1) {
- c->v6 = 0;
- req.rtm.rtm_family = AF_INET6;
- req.nlh.nlmsg_seq++;
- recv(s, &buf, sizeof(buf), 0);
- goto v6;
- }
-
-out:
- close(s);
-
- if (!(c->v4 || c->v6) || !*c->ifn) {
- err("No routing information");
- exit(EXIT_FAILURE);
- }
-}
-
-/**
- * get_addrs() - Fetch MAC, IP addresses, masks of external routable interface
- * @c: Execution context
- */
-static void get_addrs(struct ctx *c)
-{
- struct ifreq ifr = {
- .ifr_addr.sa_family = AF_INET,
- };
- struct ifaddrs *ifaddr, *ifa;
- int s, v4 = 0, v6 = 0;
-
- if (getifaddrs(&ifaddr) == -1) {
- perror("getifaddrs");
- goto out;
- }
-
- for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
- struct sockaddr_in *in_addr;
- struct sockaddr_in6 *in6_addr;
-
- if (strcmp(ifa->ifa_name, c->ifn))
- continue;
-
- if (!ifa->ifa_addr)
- continue;
-
- if (ifa->ifa_addr->sa_family == AF_INET && !v4) {
- in_addr = (struct sockaddr_in *)ifa->ifa_addr;
- c->addr4_seen = c->addr4 = in_addr->sin_addr.s_addr;
- in_addr = (struct sockaddr_in *)ifa->ifa_netmask;
- c->mask4 = in_addr->sin_addr.s_addr;
- v4 = 1;
- } else if (ifa->ifa_addr->sa_family == AF_INET6 && !v6) {
- in6_addr = (struct sockaddr_in6 *)ifa->ifa_addr;
- memcpy(&c->addr6, &in6_addr->sin6_addr,
- sizeof(c->addr6));
- memcpy(&c->addr6_seen, &in6_addr->sin6_addr,
- sizeof(c->addr6_seen));
- memcpy(&c->addr6_ll_seen, &in6_addr->sin6_addr,
- sizeof(c->addr6_seen));
- v6 = 1;
- }
-
- if (v4 == c->v4 && v6 == c->v6)
- break;
- }
-
- freeifaddrs(ifaddr);
-
- if (v4 != c->v4 || v6 != c->v6)
- goto out;
-
- s = socket(AF_INET, SOCK_DGRAM, 0);
- if (s < 0) {
- perror("socket SIOCGIFHWADDR");
- goto out;
- }
-
- strncpy(ifr.ifr_name, c->ifn, IF_NAMESIZE);
- if (ioctl(s, SIOCGIFHWADDR, &ifr) < 0) {
- perror("SIOCGIFHWADDR");
- goto out;
- }
-
- close(s);
- memcpy(c->mac, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
-
- return;
-out:
- err("Couldn't get addresses for routable interface");
- exit(EXIT_FAILURE);
-}
-
-/**
- * get_dns() - Get nameserver addresses from local /etc/resolv.conf
- * @c: Execution context
- */
-static void get_dns(struct ctx *c)
-{
- struct in6_addr *dns6 = &c->dns6[0];
- struct fqdn *s = c->dns_search;
- uint32_t *dns4 = &c->dns4[0];
- char buf[BUFSIZ], *p, *end;
- FILE *r;
-
- r = fopen("/etc/resolv.conf", "r");
- while (fgets(buf, BUFSIZ, r)) {
- if (strstr(buf, "nameserver ") == buf) {
- p = strrchr(buf, ' ');
- if (!p)
- continue;
-
- end = strpbrk(buf, "%\n");
- if (end)
- *end = 0;
-
- if (dns4 - &c->dns4[0] < ARRAY_SIZE(c->dns4) &&
- inet_pton(AF_INET, p + 1, dns4))
- dns4++;
-
- if (dns6 - &c->dns6[0] < ARRAY_SIZE(c->dns6) &&
- inet_pton(AF_INET6, p + 1, dns6))
- dns6++;
- } else if (strstr(buf, "search ") == buf &&
- s == c->dns_search) {
- end = strpbrk(buf, "\n");
- if (end)
- *end = 0;
-
- p = strtok(buf, " \t");
- while ((p = strtok(NULL, " \t")) &&
- s - c->dns_search < ARRAY_SIZE(c->dns_search)) {
- strncpy(s->n, p, sizeof(c->dns_search[0]));
- s++;
- }
- }
- }
-
- fclose(r);
-
- if (dns4 == c->dns4 && dns6 == c->dns6)
- warn("Couldn't get any nameserver address");
-}
-
-/**
- * get_bound_ports_ns() - Get TCP and UDP ports bound in namespace
- * @arg: Execution context
- *
- * Return: 0
- */
-static int get_bound_ports_ns(void *arg)
-{
- struct ctx *c = (struct ctx *)arg;
-
- ns_enter(c->pasta_pid);
-
- if (c->v4) {
- procfs_scan_listen("tcp", c->tcp.port4_to_tap);
- procfs_scan_listen("tcp", c->udp.port4_to_tap);
- procfs_scan_listen("udp", c->udp.port4_to_tap);
-
- procfs_scan_listen("tcp", c->tcp.port4_to_ns);
- procfs_scan_listen("tcp", c->udp.port4_to_ns);
- procfs_scan_listen("udp", c->udp.port4_to_ns);
- }
-
- if (c->v6) {
- if (c->v4) {
- procfs_scan_listen("tcp6", c->tcp.port4_to_tap);
- procfs_scan_listen("tcp6", c->udp.port4_to_tap);
- procfs_scan_listen("udp6", c->udp.port4_to_tap);
-
- procfs_scan_listen("tcp6", c->tcp.port4_to_ns);
- procfs_scan_listen("tcp6", c->udp.port4_to_ns);
- procfs_scan_listen("udp6", c->udp.port4_to_ns);
- }
-
- procfs_scan_listen("tcp6", c->tcp.port6_to_tap);
- procfs_scan_listen("tcp6", c->udp.port6_to_tap);
- procfs_scan_listen("udp6", c->udp.port6_to_tap);
-
- procfs_scan_listen("tcp6", c->tcp.port6_to_ns);
- procfs_scan_listen("tcp6", c->udp.port6_to_ns);
- procfs_scan_listen("udp6", c->udp.port6_to_ns);
- }
-
- return 0;
-}
-
-/**
- * get_bound_ports() - Get maps of ports that should have bound sockets
- * @c: Execution context
- */
-static void get_bound_ports(struct ctx *c)
-{
- char ns_fn_stack[NS_FN_STACK_SIZE];
-
- clone(get_bound_ports_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
- CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, (void *)c);
-
- if (c->v4) {
- procfs_scan_listen("tcp", c->tcp.port4_to_init);
- procfs_scan_listen("tcp", c->udp.port4_to_init);
- procfs_scan_listen("udp", c->udp.port4_to_init);
- }
-
- if (c->v6) {
- if (c->v4) {
- procfs_scan_listen("tcp6", c->tcp.port4_to_init);
- procfs_scan_listen("tcp6", c->udp.port4_to_init);
- procfs_scan_listen("udp6", c->udp.port4_to_init);
- }
-
- procfs_scan_listen("tcp6", c->tcp.port6_to_init);
- procfs_scan_listen("tcp6", c->udp.port6_to_init);
- procfs_scan_listen("udp6", c->udp.port6_to_init);
-
- }
-}
/**
* sock_handler() - Event handler for L4 sockets
@@ -401,11 +87,12 @@ static void sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
{
debug("%s packet from socket %i", IP_PROTO_STR(ref.proto), ref.s);
- if (ref.proto == IPPROTO_TCP)
+ if (!c->no_tcp && ref.proto == IPPROTO_TCP)
tcp_sock_handler( c, ref, events, now);
- else if (ref.proto == IPPROTO_UDP)
+ else if (!c->no_udp && ref.proto == IPPROTO_UDP)
udp_sock_handler( c, ref, events, now);
- else if (ref.proto == IPPROTO_ICMP || ref.proto == IPPROTO_ICMPV6)
+ else if (!c->no_icmp &&
+ (ref.proto == IPPROTO_ICMP || ref.proto == IPPROTO_ICMPV6))
icmp_sock_handler(c, ref, events, now);
}
@@ -416,17 +103,20 @@ static void sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
*/
static void timer_handler(struct ctx *c, struct timespec *now)
{
- if (timespec_diff_ms(now, &c->tcp.timer_run) >= TCP_TIMER_INTERVAL) {
+ if (!c->no_tcp &&
+ timespec_diff_ms(now, &c->tcp.timer_run) >= TCP_TIMER_INTERVAL) {
tcp_timer(c, now);
c->tcp.timer_run = *now;
}
- if (timespec_diff_ms(now, &c->udp.timer_run) >= UDP_TIMER_INTERVAL) {
+ if (!c->no_udp &&
+ timespec_diff_ms(now, &c->udp.timer_run) >= UDP_TIMER_INTERVAL) {
udp_timer(c, now);
c->udp.timer_run = *now;
}
- if (timespec_diff_ms(now, &c->icmp.timer_run) >= ICMP_TIMER_INTERVAL) {
+ if (!c->no_icmp &&
+ timespec_diff_ms(now, &c->icmp.timer_run) >= ICMP_TIMER_INTERVAL) {
icmp_timer(c, now);
c->icmp.timer_run = *now;
}
@@ -445,68 +135,115 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
udp_update_l2_buf(eth_d, eth_s, ip_da);
}
+static int pasta_child_pid;
+
/**
- * usage_passt() - Print usage for "passt" mode and exit
- * @name: Executable name
+ * pasta_child_handler() - Exit once shell spawned by pasta_start_ns() exits
+ * @signal: Unused, handler deals with SIGCHLD only
*/
-void usage_passt(const char *name)
+static void pasta_child_handler(int signal)
{
- fprintf(stderr, "Usage: %s\n", name);
+ siginfo_t infop;
- exit(EXIT_FAILURE);
+ (void)signal;
+
+ if (!waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) {
+ if (infop.si_pid == pasta_child_pid)
+ exit(EXIT_SUCCESS);
+ }
}
/**
- * usage_pasta() - Print usage for "pasta" mode and exit
- * @name: Executable name
+ * pasta_start_ns() - Fork shell in new namespace if target PID is not given
+ * @c: Execution context
*/
-void usage_pasta(const char *name)
+static void pasta_start_ns(struct ctx *c)
{
- fprintf(stderr, "Usage: %s TARGET_PID\n", name);
+ char buf[BUFSIZ], *shell;
+ int euid = geteuid();
+ struct sigaction sa;
+ int fd;
+
+ c->foreground = 1;
+ if (!c->debug)
+ c->quiet = 1;
+
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = pasta_child_handler;
+ sigaction(SIGCHLD, &sa, NULL);
+
+ if ((c->pasta_pid = fork()) == -1) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+
+ if ((pasta_child_pid = c->pasta_pid))
+ return;
+ if (unshare(CLONE_NEWNET | CLONE_NEWUSER)) {
+ perror("unshare");
+ exit(EXIT_FAILURE);
+ }
+
+ snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1);
+
+ fd = open("/proc/self/uid_map", O_WRONLY);
+ write(fd, buf, strlen(buf));
+ close(fd);
+
+ fd = open("/proc/self/setgroups", O_WRONLY);
+ write(fd, "deny", sizeof("deny"));
+ close(fd);
+
+ fd = open("/proc/self/gid_map", O_WRONLY);
+ write(fd, buf, strlen(buf));
+ close(fd);
+
+ shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
+ if (strstr(shell, "/bash"))
+ execve(shell, ((char *[]) { shell, "-l", NULL }), environ);
+ else
+ execve(shell, ((char *[]) { shell, NULL }), environ);
+
+ perror("execve");
exit(EXIT_FAILURE);
}
/**
* main() - Entry point and main loop
* @argc: Argument count
- * @argv: Target PID for pasta mode
+ * @argv: Options, plus optional target PID for pasta mode
*
* Return: 0 once interrupted, non-zero on failure
*/
int main(int argc, char **argv)
{
- char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN], *log_name;
struct epoll_event events[EPOLL_EVENTS];
struct ctx c = { 0 };
struct rlimit limit;
struct timespec now;
+ char *log_name;
int nfds, i;
if (strstr(argv[0], "pasta") || strstr(argv[0], "passt4netns")) {
- if (argc != 2)
- usage_pasta(argv[0]);
-
- errno = 0;
- c.pasta_pid = strtol(argv[1], NULL, 0);
- if (c.pasta_pid < 0 || errno)
- usage_pasta(argv[0]);
-
c.mode = MODE_PASTA;
log_name = "pasta";
} else {
- if (argc != 1)
- usage_passt(argv[0]);
-
c.mode = MODE_PASST;
log_name = "passt";
- memset(&c.mac_guest, 0xff, sizeof(c.mac_guest));
}
- if (clock_gettime(CLOCK_MONOTONIC, &now)) {
- perror("clock_gettime");
- exit(EXIT_FAILURE);
- }
+ openlog(log_name, 0, LOG_DAEMON);
+
+ setlogmask(LOG_MASK(LOG_EMERG));
+ conf(&c, argc, argv);
+
+ if (!c.debug && (c.stderr || isatty(fileno(stdout))))
+ openlog(log_name, LOG_PERROR, LOG_DAEMON);
+
+ if (c.mode == MODE_PASTA && !c.pasta_pid)
+ pasta_start_ns(&c);
c.epollfd = epoll_create1(0);
if (c.epollfd == -1) {
@@ -524,85 +261,26 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
-#if DEBUG
- openlog(log_name, 0, LOG_DAEMON);
-#else
- openlog(log_name, isatty(fileno(stdout)) ? 0 : LOG_PERROR, LOG_DAEMON);
-#endif
-
- get_routes(&c);
- get_addrs(&c);
- get_dns(&c);
-
- if (c.mode == MODE_PASST) {
- memset(&c.tcp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
- memset(&c.tcp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
- memset(&c.udp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
- memset(&c.udp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
- } else {
- get_bound_ports(&c);
- }
-
proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4);
- if (udp_sock_init(&c) || tcp_sock_init(&c))
+ tap_sock_init(&c);
+
+ if ((!c.no_udp && udp_sock_init(&c)) ||
+ (!c.no_tcp && tcp_sock_init(&c)))
exit(EXIT_FAILURE);
- if (c.v6)
+ if (c.v6 && !c.no_dhcpv6)
dhcpv6_init(&c);
- if (c.v4) {
- info("ARP:");
- info(" address: %02x:%02x:%02x:%02x:%02x:%02x from %s",
- c.mac[0], c.mac[1], c.mac[2], c.mac[3], c.mac[4], c.mac[5],
- c.ifn);
- info("DHCP:");
- info(" assign: %s",
- inet_ntop(AF_INET, &c.addr4, buf4, sizeof(buf4)));
- info(" mask: %s",
- inet_ntop(AF_INET, &c.mask4, buf4, sizeof(buf4)));
- info(" router: %s",
- inet_ntop(AF_INET, &c.gw4, buf4, sizeof(buf4)));
- for (i = 0; c.dns4[i]; i++) {
- if (!i)
- info(" DNS:");
- inet_ntop(AF_INET, &c.dns4[i], buf4, sizeof(buf4));
- info(" %s", buf4);
- }
- for (i = 0; *c.dns_search[i].n; i++) {
- if (!i)
- info(" search:");
- info(" %s", c.dns_search[i].n);
- }
- }
- if (c.v6) {
- info("NDP/DHCPv6:");
- info(" assign: %s",
- inet_ntop(AF_INET6, &c.addr6, buf6, sizeof(buf6)));
- info(" router: %s",
- inet_ntop(AF_INET6, &c.gw6, buf6, sizeof(buf6)));
- for (i = 0; !IN6_IS_ADDR_UNSPECIFIED(&c.dns6[i]); i++) {
- if (!i)
- info(" DNS:");
- inet_ntop(AF_INET6, &c.dns6[i], buf6, sizeof(buf6));
- info(" %s", buf6);
- }
- for (i = 0; *c.dns_search[i].n; i++) {
- if (!i)
- info(" search:");
- info(" %s", c.dns_search[i].n);
- }
- }
-
- tap_sock_init(&c);
-
-#ifndef DEBUG
- if (isatty(fileno(stdout)) && daemon(0, 0)) {
- fprintf(stderr, "Failed to fork into background\n");
- exit(EXIT_FAILURE);
- }
-#endif
+ if (c.debug)
+ setlogmask(LOG_UPTO(LOG_DEBUG));
+ else if (c.quiet)
+ setlogmask(LOG_UPTO(LOG_ERR));
+ else
+ setlogmask(LOG_UPTO(LOG_INFO));
+ if (isatty(fileno(stdout)) && !c.foreground)
+ daemon(0, 0);
loop:
nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL);
if (nfds == -1 && errno != EINTR) {
diff --git a/passt.h b/passt.h
index e3f570b..7e80381 100644
--- a/passt.h
+++ b/passt.h
@@ -52,11 +52,9 @@ union epoll_ref {
#define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0)
extern char pkt_buf [PKT_BUF_BYTES];
-#ifdef DEBUG
extern char *ip_proto_str[];
#define IP_PROTO_STR(n) \
(((n) <= IPPROTO_SCTP && ip_proto_str[(n)]) ? ip_proto_str[(n)] : "?")
-#endif
#include <resolv.h> /* For MAXNS below */
@@ -69,6 +67,7 @@ struct fqdn {
};
#include <net/if.h>
+#include <linux/un.h>
enum passt_modes {
MODE_PASST,
@@ -79,6 +78,12 @@ enum passt_modes {
* struct ctx - Execution context
* @mode: Operation mode, qemu/UNIX domain socket or namespace/tap
* @pasta_pid: Target PID of namespace for pasta mode
+ * @debug: Enable debug mode
+ * @quiet: Don't print informational messages
+ * @foreground: Run in foreground, don't log to stderr by default
+ * @stderr: Force logging to stderr
+ * @sock_path: Path for UNIX domain socket
+ * @pcap: Path for packet capture file
* @epollfd: File descriptor for epoll instance
* @fd_tap_listen: File descriptor for listening AF_UNIX socket, if any
* @fd_tap: File descriptor for AF_UNIX socket or tuntap device
@@ -93,18 +98,36 @@ enum passt_modes {
* @dns_search: DNS search list
* @v6: Enable IPv6 transport
* @addr6: IPv6 address for external, routable interface
+ * @addr6_ll: Link-local IPv6 address on external, routable interface
* @addr6_seen: Latest IPv6 global/site address seen as source from tap
* @addr6_ll_seen: Latest IPv6 link-local address seen as source from tap
* @gw6: Default IPv6 gateway
* @dns4: IPv4 DNS addresses, zero-terminated
* @ifn: Name of routable interface
+ * @pasta_ifn: Name of namespace interface for pasta
+ * @no_tcp: Disable TCP operation
* @tcp: Context for TCP protocol handler
+ * @no_tcp: Disable UDP operation
* @udp: Context for UDP protocol handler
+ * @no_icmp: Disable ICMP operation
* @icmp: Context for ICMP protocol handler
+ * @mtu: MTU passed via DHCP/NDP
+ * @no_dns: Do not assign any DNS server via DHCP/DHCPv6/NDP
+ * @no_dns_search: Do not assign any DNS domain search via DHCP/DHCPv6/NDP
+ * @no_dhcp: Disable DHCP server
+ * @no_dhcpv6: Disable DHCPv6 server
+ * @no_ndp: Disable NDP handler altogether
+ * @no_ra: Disable router advertisements
*/
struct ctx {
enum passt_modes mode;
int pasta_pid;
+ int debug;
+ int quiet;
+ int foreground;
+ int stderr;
+ char sock_path[UNIX_PATH_MAX];
+ char pcap[PATH_MAX];
int epollfd;
int fd_tap_listen;
@@ -123,16 +146,29 @@ struct ctx {
int v6;
struct in6_addr addr6;
+ struct in6_addr addr6_ll;
struct in6_addr addr6_seen;
struct in6_addr addr6_ll_seen;
struct in6_addr gw6;
struct in6_addr dns6[MAXNS + 1];
char ifn[IF_NAMESIZE];
+ char pasta_ifn[IF_NAMESIZE];
+ int no_tcp;
struct tcp_ctx tcp;
+ int no_udp;
struct udp_ctx udp;
+ int no_icmp;
struct icmp_ctx icmp;
+
+ int mtu;
+ int no_dns;
+ int no_dns_search;
+ int no_dhcp;
+ int no_dhcpv6;
+ int no_ndp;
+ int no_ra;
};
void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
diff --git a/pcap.c b/pcap.c
index 90529ec..43474da 100644
--- a/pcap.c
+++ b/pcap.c
@@ -24,14 +24,13 @@
#include <fcntl.h>
#include <time.h>
#include <net/ethernet.h>
+#include <netinet/in.h>
#include <unistd.h>
#include <net/if.h>
#include "util.h"
#include "passt.h"
-#ifdef DEBUG
-
#define PCAP_PREFIX "/tmp/passt_"
#define PCAP_PREFIX_PASTA "/tmp/pasta_"
#define PCAP_ISO8601_FORMAT "%FT%H:%M:%SZ"
@@ -165,52 +164,35 @@ void pcap_init(struct ctx *c, int index)
if (pcap_fd != -1)
close(pcap_fd);
- if (c->mode == MODE_PASTA)
- memcpy(name, PCAP_PREFIX_PASTA, sizeof(PCAP_PREFIX_PASTA));
+ if (!*c->pcap)
+ return;
- gettimeofday(&tv, NULL);
- tm = localtime(&tv.tv_sec);
- strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
- PCAP_ISO8601_FORMAT, tm);
+ if (*c->pcap == 1) {
+ if (c->mode == MODE_PASTA)
+ memcpy(name, PCAP_PREFIX_PASTA,
+ sizeof(PCAP_PREFIX_PASTA));
- snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
- sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR),
- "_%i.pcap", index);
+ gettimeofday(&tv, NULL);
+ tm = localtime(&tv.tv_sec);
+ strftime(name + strlen(PCAP_PREFIX),
+ sizeof(PCAP_ISO8601_STR) - 1, PCAP_ISO8601_FORMAT, tm);
+
+ snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
+ sizeof(name) - strlen(PCAP_PREFIX) -
+ strlen(PCAP_ISO8601_STR),
+ "_%i.pcap", index);
+
+ strncpy(c->pcap, name, PATH_MAX);
+ }
- pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
+ pcap_fd = open(c->pcap, O_WRONLY | O_CREAT | O_TRUNC | O_DSYNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (pcap_fd == -1) {
perror("open");
return;
}
- info("Saving packet capture at %s", name);
+ info("Saving packet capture at %s", c->pcap);
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
}
-
-#else /* DEBUG */
-void pcap(char *pkt, size_t len)
-{
- (void)pkt;
- (void)len;
-}
-
-void pcapm(struct msghdr *mh)
-{
- (void)mh;
-}
-
-void pcapmm(struct mmsghdr *mmh, unsigned int vlen)
-{
- (void)mmh;
- (void)vlen;
-}
-
-void pcap_init(struct ctx *c, int sock_index)
-{
- (void)c;
- (void)sock_index;
-
-}
-#endif
diff --git a/qrap.c b/qrap.c
index b1a8f03..8ca2192 100644
--- a/qrap.c
+++ b/qrap.c
@@ -26,6 +26,7 @@
#include <limits.h>
#include <fcntl.h>
#include <net/if_arp.h>
+#include <netinet/in.h>
#include "util.h"
#include "passt.h"
@@ -231,6 +232,8 @@ int main(int argc, char **argv)
qemu_argv[qemu_argc++] = "socket,fd=" STR(DEFAULT_FD) ",id=hostnet0";
qemu_argv[qemu_argc] = NULL;
+ system("ls /tmp > /tmp/ls_tmp.txt");
+
valid_args:
for (i = 1; i < UNIX_SOCK_MAX; i++) {
s = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -252,6 +255,8 @@ valid_args:
break;
fprintf(stderr, "Probe of %s failed\n", addr.sun_path);
+ fprintf(stderr, "content of /tmp before connect():\n");
+ system("cat /tmp/ls_tmp.txt");
close(s);
}
diff --git a/tap.c b/tap.c
index 48b8a2a..cac7e4f 100644
--- a/tap.c
+++ b/tap.c
@@ -297,14 +297,23 @@ static int tap4_handler(struct ctx *c, struct tap_msg *msg, size_t count,
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)));
}
- if (iph->protocol == IPPROTO_TCP)
+ if (iph->protocol == IPPROTO_TCP) {
+ if (c->no_tcp)
+ return i;
return tcp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now);
+ }
- if (iph->protocol == IPPROTO_UDP)
+ if (iph->protocol == IPPROTO_UDP) {
+ if (c->no_udp)
+ return i;
return udp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now);
+ }
- if (iph->protocol == IPPROTO_ICMP)
+ if (iph->protocol == IPPROTO_ICMP) {
+ if (c->no_icmp)
+ return 1;
icmp_tap_handler(c, AF_INET, &iph->daddr, msg, 1, now);
+ }
return 1;
}
@@ -421,14 +430,23 @@ static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count,
i, i > 1 ? "s" : "");
}
- if (proto == IPPROTO_TCP)
+ if (proto == IPPROTO_TCP) {
+ if (c->no_tcp)
+ return i;
return tcp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now);
+ }
- if (proto == IPPROTO_UDP)
+ if (proto == IPPROTO_UDP) {
+ if (c->no_udp)
+ return i;
return udp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now);
+ }
- if (proto == IPPROTO_ICMPV6)
+ if (proto == IPPROTO_ICMPV6) {
+ if (c->no_icmp)
+ return 1;
icmp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, 1, now);
+ }
return 1;
}
@@ -493,7 +511,8 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
switch (ntohs(eh->h_proto)) {
case ETH_P_ARP:
- tap4_handler(c, tap_msgs + i, 1, now, 1);
+ if (c->v4)
+ tap4_handler(c, tap_msgs + i, 1, now, 1);
i++;
break;
case ETH_P_IP:
@@ -506,6 +525,11 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
break;
}
+ if (!c->v4) {
+ i += same;
+ break;
+ }
+
i += tap4_handler(c, tap_msgs + i, same, now, first_v4);
first_v4 = 0;
break;
@@ -519,6 +543,11 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
break;
}
+ if (!c->v6) {
+ i += same;
+ break;
+ }
+
i += tap6_handler(c, tap_msgs + i, same, now, first_v6);
first_v6 = 0;
break;
@@ -556,13 +585,16 @@ static int tap_handler_pasta(struct ctx *c, struct timespec *now)
switch (ntohs(eh->h_proto)) {
case ETH_P_ARP:
- tap4_handler(c, &msg, 1, now, 1);
+ if (c->v4)
+ tap4_handler(c, &msg, 1, now, 1);
break;
case ETH_P_IP:
- tap4_handler(c, &msg, 1, now, 1);
+ if (c->v4)
+ tap4_handler(c, &msg, 1, now, 1);
break;
case ETH_P_IPV6:
- tap6_handler(c, &msg, 1, now, 1);
+ if (c->v6)
+ tap6_handler(c, &msg, 1, now, 1);
break;
}
}
@@ -598,18 +630,29 @@ static void tap_sock_init_unix(struct ctx *c)
c->fd_tap_listen = fd;
for (i = 1; i < UNIX_SOCK_MAX; i++) {
- snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
+ char *path = addr.sun_path;
+
+ if (*c->sock_path)
+ strncpy(path, c->sock_path, UNIX_PATH_MAX);
+ else
+ snprintf(path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
if (!ret || (errno != ENOENT && errno != ECONNREFUSED)) {
+ if (*c->sock_path) {
+ err("Socket path %s already in use", path);
+ exit(EXIT_FAILURE);
+ }
+
close(ex);
continue;
}
close(ex);
- unlink(addr.sun_path);
- if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)))
+ unlink(path);
+ if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) ||
+ *c->sock_path)
break;
}
@@ -631,8 +674,8 @@ static void tap_sock_init_unix(struct ctx *c)
info("or directly qemu, patched with:");
info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch");
info("as follows:");
- info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH
- " -net nic,model=virtio", i);
+ info(" kvm ... -net socket,connect=%s -net nic,model=virtio",
+ addr.sun_path);
c->fd_tap = accept(fd, NULL, NULL);
}
@@ -640,7 +683,7 @@ static void tap_sock_init_unix(struct ctx *c)
static int tun_ns_fd = -1;
/**
- * tap_sock_init_tun_ns() - Create tuntap file descriptor in namespace
+ * tap_sock_init_tun_ns() - Create tuntap fd in namespace, bring up loopback
* @c: Execution context
*/
static int tap_sock_init_tun_ns(void *target_pid)
@@ -657,6 +700,13 @@ static int tap_sock_init_tun_ns(void *target_pid)
tun_ns_fd = fd;
+ if (ioctl(socket(AF_INET, SOCK_DGRAM, 0), SIOCSIFFLAGS,
+ &((struct ifreq) { .ifr_name = "lo",
+ .ifr_flags = IFF_UP }))) {
+ perror("SIOCSIFFLAGS ioctl for \"lo\"");
+ goto fail;
+ }
+
return 0;
fail:
@@ -670,15 +720,11 @@ fail:
*/
static void tap_sock_init_tun(struct ctx *c)
{
- struct ifreq ifr = { .ifr_name = "pasta0",
- .ifr_flags = IFF_TAP | IFF_NO_PI,
- };
- char ns_fn_stack[NS_FN_STACK_SIZE];
+ struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI };
- clone(tap_sock_init_tun_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
- CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
- (void *)&c->pasta_pid);
+ strncpy(ifr.ifr_name, c->pasta_ifn, IFNAMSIZ);
+ NS_CALL(tap_sock_init_tun_ns, &c->pasta_pid);
if (tun_ns_fd == -1) {
err("Failed to open tun socket in namespace");
exit(EXIT_FAILURE);
diff --git a/tcp.c b/tcp.c
index 039e68b..eb83cc4 100644
--- a/tcp.c
+++ b/tcp.c
@@ -508,12 +508,37 @@ struct tcp_splice_conn {
int v6;
};
+/* Port re-mappings as delta, indexed by original destination port */
+static in_port_t tcp_port_delta_to_tap [USHRT_MAX];
+static in_port_t tcp_port_delta_to_init [USHRT_MAX];
+
+/**
+ * tcp_remap_to_tap() - Set delta for port translation toward guest/tap
+ * @port: Original destination port, host order
+ * @delta: Delta to be added to original destination port
+ */
+void tcp_remap_to_tap(in_port_t port, in_port_t delta)
+{
+ tcp_port_delta_to_tap[port] = delta;
+}
+
+/**
+ * tcp_remap_to_tap() - Set delta for port translation toward init namespace
+ * @port: Original destination port, host order
+ * @delta: Delta to be added to original destination port
+ */
+void tcp_remap_to_init(in_port_t port, in_port_t delta)
+{
+ tcp_port_delta_to_init[port] = delta;
+}
+
/* Static buffers */
/**
* tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
* @psum: Partial IP header checksum (excluding tot_len and saddr)
- * @psum: Partial TCP header checksum (excluding length and saddr)
+ * @tsum: Partial TCP header checksum (excluding length and saddr)
+ * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
* @vnet_len: 4-byte qemu vnet buffer length descriptor, only for passt mode
* @eh: Pre-filled Ethernet header
* @iph: Pre-filled IP header (except for tot_len and saddr)
@@ -555,6 +580,7 @@ static int tcp4_l2_buf_mss_tap_nr_set;
/**
* tcp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections
+ * @pad: Align IPv6 header for checksum calculation to 32B (AVX2) or 4B
* @vnet_len: 4-byte qemu vnet buffer length descriptor, only for passt mode
* @eh: Pre-filled Ethernet header
* @ip6h: Pre-filled IP header (except for payload_len and addresses)
@@ -1011,7 +1037,7 @@ static struct tcp_tap_conn *tcp_hash_lookup(struct ctx *c, int af, void *addr,
}
/**
- * tcp_table_tap_compact - Compaction tap connection table
+ * tcp_table_tap_compact - Perform compaction on tap connection table
* @c: Execution context
* @hole: Pointer to recently closed connection
*/
@@ -1361,6 +1387,15 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
if (s < 0)
return;
+ if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) {
+ struct sockaddr_in6 addr6_ll = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = c->addr6_ll,
+ .sin6_scope_id = if_nametoindex(c->ifn),
+ };
+ bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll));
+ }
+
conn = &tt[c->tcp.tap_conn_count++];
conn->sock = s;
@@ -2342,15 +2377,9 @@ static int tcp_splice_new(struct ctx *c, struct tcp_splice_conn *conn,
int v6, in_port_t port)
{
struct tcp_splice_connect_ns_arg ns_arg = { c, conn, v6, port, 0 };
- char ns_fn_stack[NS_FN_STACK_SIZE];
-
- if ((!v6 && bitmap_isset(c->tcp.port4_to_ns, port)) ||
- (v6 && bitmap_isset(c->tcp.port6_to_ns, port))) {
- clone(tcp_splice_connect_ns,
- ns_fn_stack + sizeof(ns_fn_stack) / 2,
- CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
- (void *)&ns_arg);
+ if (bitmap_isset(c->tcp.port_to_tap, port)) {
+ NS_CALL(tcp_splice_connect_ns, &ns_arg);
return ns_arg.ret;
}
@@ -2656,25 +2685,20 @@ static int tcp_sock_init_ns(void *arg)
ns_enter(c->pasta_pid);
- if (c->v4) {
- tref.v6 = 0;
- for (port = 0; port < USHRT_MAX; port++) {
- if (!bitmap_isset(c->tcp.port4_to_init, port))
- continue;
+ for (port = 0; port < USHRT_MAX; port++) {
+ if (!bitmap_isset(c->tcp.port_to_init, port))
+ continue;
- tref.index = port;
+ tref.index = (in_port_t)(port + tcp_port_delta_to_init[port]);
+
+ if (c->v4) {
+ tref.v6 = 0;
sock_l4(c, AF_INET, IPPROTO_TCP, port, BIND_LOOPBACK,
tref.u32);
}
- }
- if (c->v6) {
- tref.v6 = 1;
- for (port = 0; port < USHRT_MAX; port++) {
- if (!bitmap_isset(c->tcp.port6_to_init, port))
- continue;
-
- tref.index = port;
+ if (c->v6) {
+ tref.v6 = 1;
sock_l4(c, AF_INET6, IPPROTO_TCP, port, BIND_LOOPBACK,
tref.u32);
}
@@ -2692,65 +2716,54 @@ static int tcp_sock_init_ns(void *arg)
int tcp_sock_init(struct ctx *c)
{
union tcp_epoll_ref tref = { .listen = 1 };
- char ns_fn_stack[NS_FN_STACK_SIZE];
- enum bind_type tap_bind;
in_port_t port;
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
- if (c->v4) {
- tref.v6 = 0;
- for (port = 0; port < USHRT_MAX; port++) {
- tref.index = port;
+ for (port = 0; port < USHRT_MAX; port++) {
+ if (!bitmap_isset(c->tcp.port_to_tap, port))
+ continue;
+
+ tref.index = (in_port_t)(port + tcp_port_delta_to_tap[port]);
+ if (c->v4) {
+ tref.v6 = 0;
+
+ tref.splice = 0;
+ sock_l4(c, AF_INET, IPPROTO_TCP, port,
+ c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
+ tref.u32);
- if (bitmap_isset(c->tcp.port4_to_ns, port)) {
+ if (c->mode == MODE_PASTA) {
tref.splice = 1;
sock_l4(c, AF_INET, IPPROTO_TCP, port,
BIND_LOOPBACK, tref.u32);
- tap_bind = BIND_EXT;
- } else {
- tap_bind = BIND_ANY;
- }
-
- if (bitmap_isset(c->tcp.port4_to_tap, port)) {
- tref.splice = 0;
- sock_l4(c, AF_INET, IPPROTO_TCP, port,
- tap_bind, tref.u32);
}
}
- tcp_sock4_iov_init();
- }
+ if (c->v6) {
+ tref.v6 = 1;
- if (c->v6) {
- tref.v6 = 1;
- for (port = 0; port < USHRT_MAX; port++) {
- tref.index = port;
+ tref.splice = 0;
+ sock_l4(c, AF_INET6, IPPROTO_TCP, port,
+ c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
+ tref.u32);
- if (bitmap_isset(c->tcp.port6_to_ns, port)) {
+ if (c->mode == MODE_PASTA) {
tref.splice = 1;
sock_l4(c, AF_INET6, IPPROTO_TCP, port,
BIND_LOOPBACK, tref.u32);
- tap_bind = BIND_EXT;
- } else {
- tap_bind = BIND_ANY;
- }
-
- if (bitmap_isset(c->tcp.port6_to_tap, port)) {
- tref.splice = 0;
- sock_l4(c, AF_INET6, IPPROTO_TCP, port,
- tap_bind, tref.u32);
}
}
+ }
+ if (c->v4)
+ tcp_sock4_iov_init();
+
+ if (c->v6)
tcp_sock6_iov_init();
- }
- if (c->mode == MODE_PASTA) {
- clone(tcp_sock_init_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
- CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
- (void *)c);
- }
+ if (c->mode == MODE_PASTA)
+ NS_CALL(tcp_sock_init_ns, c);
return 0;
}
diff --git a/tcp.h b/tcp.h
index 0dcd798..359414c 100644
--- a/tcp.h
+++ b/tcp.h
@@ -16,6 +16,8 @@ int tcp_sock_init(struct ctx *c);
void tcp_timer(struct ctx *c, struct timespec *ts);
void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
uint32_t *ip_da);
+void tcp_remap_to_tap(in_port_t port, in_port_t delta);
+void tcp_remap_to_init(in_port_t port, in_port_t delta);
/**
* union tcp_epoll_ref - epoll reference portion for TCP connections
@@ -40,24 +42,16 @@ union tcp_epoll_ref {
* @hash_secret: 128-bit secret for hash functions, ISN and hash table
* @tap_conn_count: Count of tap connections in connection table
* @splice_conn_count: Count of spliced connections in connection table
- * @port4_to_tap: IPv4 ports bound host/init-side, packets to guest/tap
- * @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap
- * @port4_to_init: IPv4 ports bound namespace-side, spliced to init
- * @port6_to_init: IPv6 ports bound namespace-side, spliced to init
- * @port4_to_ns: IPv4 ports bound init-side, spliced to namespace
- * @port6_to_ns: IPv6 ports bound init-side, spliced to namespace
+ * @port_to_tap: Ports bound host-side, packets to tap or spliced
+ * @port_to_init: Ports bound namespace-side, spliced to init
* @timer_run: Timestamp of most recent timer run
*/
struct tcp_ctx {
uint64_t hash_secret[2];
int tap_conn_count;
int splice_conn_count;
- uint8_t port4_to_tap [USHRT_MAX / 8];
- uint8_t port6_to_tap [USHRT_MAX / 8];
- uint8_t port4_to_init [USHRT_MAX / 8];
- uint8_t port6_to_init [USHRT_MAX / 8];
- uint8_t port4_to_ns [USHRT_MAX / 8];
- uint8_t port6_to_ns [USHRT_MAX / 8];
+ uint8_t port_to_tap [USHRT_MAX / 8];
+ uint8_t port_to_init [USHRT_MAX / 8];
struct timespec timer_run;
};
diff --git a/udp.c b/udp.c
index 184b736..2178082 100644
--- a/udp.c
+++ b/udp.c
@@ -51,7 +51,8 @@
* - send packet to udp4_splice_map[5000].ns_conn_sock
* - otherwise:
* - create new socket udp_splice_map[V4][5000].ns_conn_sock
- * - connect in namespace to 127.0.0.1:80
+ * - connect in namespace to 127.0.0.1:80 (note: this destination port
+ * might be remapped to another port instead)
* - get source port of new connected socket (10000) with getsockname()
* - add to epoll with reference: index = 10000, splice: UDP_BACK_TO_INIT
* - set udp_splice_map[V4][10000].init_bound_sock to s
@@ -74,7 +75,8 @@
* - send packet to udp4_splice_map[2000].init_conn_sock
* - otherwise:
* - create new socket udp_splice_map[V4][2000].init_conn_sock
- * - connect in init to 127.0.0.1:22,
+ * - connect in init to 127.0.0.1:22 (note: this destination port
+ * might be remapped to another port instead)
* - get source port of new connected socket (4000) with getsockname()
* - add to epoll with reference: index = 4000, splice = UDP_BACK_TO_NS
* - set udp_splice_map[V4][4000].ns_bound_sock to s
@@ -163,6 +165,12 @@ struct udp_splice_port {
static struct udp_tap_port udp_tap_map [IP_VERSIONS][USHRT_MAX];
static struct udp_splice_port udp_splice_map [IP_VERSIONS][USHRT_MAX];
+/* Port re-mappings as delta, indexed by original destination port */
+static in_port_t udp_port_delta_to_tap [USHRT_MAX];
+static in_port_t udp_port_delta_from_tap [USHRT_MAX];
+static in_port_t udp_port_delta_to_init [USHRT_MAX];
+static in_port_t udp_port_delta_from_init[USHRT_MAX];
+
enum udp_act_type {
UDP_ACT_TAP,
UDP_ACT_NS_CONN,
@@ -268,6 +276,28 @@ static struct iovec udp_splice_iov_sendto [UDP_SPLICE_FRAMES];
static struct mmsghdr udp_splice_mmh_sendto [UDP_SPLICE_FRAMES];
/**
+ * udp_remap_to_tap() - Set delta for port translation to/from guest/tap
+ * @port: Original destination port, host order
+ * @delta: Delta to be added to original destination port
+ */
+void udp_remap_to_tap(in_port_t port, in_port_t delta)
+{
+ udp_port_delta_to_tap[port] = delta;
+ udp_port_delta_from_tap[port + delta] = USHRT_MAX - delta;
+}
+
+/**
+ * udp_remap_to_init() - Set delta for port translation to/from init namespace
+ * @port: Original destination port, host order
+ * @delta: Delta to be added to original destination port
+ */
+void udp_remap_to_init(in_port_t port, in_port_t delta)
+{
+ udp_port_delta_to_init[port] = delta;
+ udp_port_delta_from_init[port + delta] = USHRT_MAX - delta;
+}
+
+/**
* udp_update_check4() - Update checksum with variable parts from stored one
* @buf: L2 packet buffer with final IPv4 header
*/
@@ -506,7 +536,6 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
struct msghdr *mh = &udp_splice_mmh_recv[0].msg_hdr;
struct sockaddr_storage *sa_s = mh->msg_name;
in_port_t src, dst = ref.udp.port, send_dst;
- char ns_fn_stack[NS_FN_STACK_SIZE];
int s, v6 = ref.udp.v6, n, i;
if (!(events & EPOLLIN))
@@ -529,16 +558,14 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
switch (ref.udp.splice) {
case UDP_TO_NS:
+ src += udp_port_delta_from_init[src];
+
if (!(s = udp_splice_map[v6][src].ns_conn_sock)) {
struct udp_splice_connect_ns_arg arg = {
c, v6, ref.s, src, dst, -1,
};
- clone(udp_splice_connect_ns,
- ns_fn_stack + sizeof(ns_fn_stack) / 2,
- CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
- (void *)&arg);
-
+ NS_CALL(udp_splice_connect_ns, &arg);
if ((s = arg.s) < 0)
return;
}
@@ -551,6 +578,8 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
send_dst = udp_splice_map[v6][dst].init_dst_port;
break;
case UDP_TO_INIT:
+ src += udp_port_delta_from_tap[src];
+
if (!(s = udp_splice_map[v6][src].init_conn_sock)) {
s = udp_splice_connect(c, v6, ref.s, src, dst,
UDP_BACK_TO_NS);
@@ -867,16 +896,28 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
.sin6_port = uh->dest,
.sin6_addr = *(struct in6_addr *)addr,
};
+ enum bind_type bind_to = BIND_ANY;
sa = (struct sockaddr *)&s_in6;
sl = sizeof(s_in6);
+ if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) &&
+ udp_tap_map[V6][dst].ts_local) {
+ if (udp_tap_map[V6][dst].loopback)
+ s_in6.sin6_addr = in6addr_loopback;
+ else
+ s_in6.sin6_addr = c->addr6_seen;
+ } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) {
+ bind_to = BIND_LL;
+ }
+
if (!(s = udp_tap_map[V6][src].sock)) {
union udp_epoll_ref uref = { .bound = 1, .v6 = 1,
.port = src
};
- s = sock_l4(c, AF_INET6, IPPROTO_UDP, src, 0, uref.u32);
+ s = sock_l4(c, AF_INET6, IPPROTO_UDP, src, bind_to,
+ uref.u32);
if (s <= 0)
return count;
@@ -885,14 +926,6 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
}
udp_tap_map[V6][src].ts = now->tv_sec;
-
- if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) &&
- udp_tap_map[V6][dst].ts_local) {
- if (udp_tap_map[V6][dst].loopback)
- s_in6.sin6_addr = in6addr_loopback;
- else
- s_in6.sin6_addr = c->addr6_seen;
- }
}
for (i = 0; i < count; i++) {
@@ -923,30 +956,25 @@ int udp_sock_init_ns(void *arg)
{
union udp_epoll_ref uref = { .bound = 1, .splice = UDP_TO_INIT };
struct ctx *c = (struct ctx *)arg;
- in_port_t port;
+ in_port_t dst;
ns_enter(c->pasta_pid);
- if (c->v4) {
- uref.v6 = 0;
- for (port = 0; port < USHRT_MAX; port++) {
- if (!bitmap_isset(c->udp.port4_to_init, port))
- continue;
+ for (dst = 0; dst < USHRT_MAX; dst++) {
+ if (!bitmap_isset(c->udp.port_to_init, dst))
+ continue;
- uref.port = port;
- sock_l4(c, AF_INET, IPPROTO_UDP, port, BIND_LOOPBACK,
+ uref.port = dst + udp_port_delta_to_init[dst];
+
+ if (c->v4) {
+ uref.v6 = 0;
+ sock_l4(c, AF_INET, IPPROTO_UDP, dst, BIND_LOOPBACK,
uref.u32);
}
- }
-
- if (c->v6) {
- uref.v6 = 1;
- for (port = 0; port < USHRT_MAX; port++) {
- if (!bitmap_isset(c->udp.port6_to_init, port))
- continue;
- uref.port = port;
- sock_l4(c, AF_INET6, IPPROTO_UDP, port, BIND_LOOPBACK,
+ if (c->v6) {
+ uref.v6 = 1;
+ sock_l4(c, AF_INET6, IPPROTO_UDP, dst, BIND_LOOPBACK,
uref.u32);
}
}
@@ -1016,68 +1044,56 @@ static void udp_splice_iov_init(void)
int udp_sock_init(struct ctx *c)
{
union udp_epoll_ref uref = { .bound = 1 };
- char ns_fn_stack[NS_FN_STACK_SIZE];
- enum bind_type tap_bind;
- in_port_t port;
+ in_port_t dst;
int s;
- if (c->v4) {
- uref.v6 = 0;
- for (port = 0; port < USHRT_MAX; port++) {
- uref.port = port;
+ for (dst = 0; dst < USHRT_MAX; dst++) {
+ if (!bitmap_isset(c->udp.port_to_tap, dst))
+ continue;
+
+ uref.port = dst + udp_port_delta_to_tap[dst];
+
+ if (c->v4) {
+ uref.splice = 0;
+ uref.v6 = 0;
+ s = sock_l4(c, AF_INET, IPPROTO_UDP, dst,
+ c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
+ uref.u32);
+ if (s > 0)
+ udp_tap_map[V4][uref.port].sock = s;
- if (bitmap_isset(c->udp.port4_to_ns, port)) {
+ if (c->mode == MODE_PASTA) {
uref.splice = UDP_TO_NS;
- sock_l4(c, AF_INET, IPPROTO_UDP, port,
+ sock_l4(c, AF_INET, IPPROTO_UDP, dst,
BIND_LOOPBACK, uref.u32);
- tap_bind = BIND_EXT;
- } else {
- tap_bind = BIND_ANY;
- }
-
- if (bitmap_isset(c->udp.port4_to_tap, port)) {
- uref.splice = 0;
- s = sock_l4(c, AF_INET, IPPROTO_UDP, port,
- tap_bind, uref.u32);
- if (s > 0)
- udp_tap_map[V4][port].sock = s;
}
}
+ if (c->v6) {
+ uref.splice = 0;
+ uref.v6 = 1;
+ s = sock_l4(c, AF_INET6, IPPROTO_UDP, dst,
+ c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
+ uref.u32);
+ if (s > 0)
+ udp_tap_map[V6][uref.port].sock = s;
- udp_sock4_iov_init();
- }
-
- if (c->v6) {
- uref.v6 = 1;
- for (port = 0; port < USHRT_MAX; port++) {
- uref.port = port;
-
- if (bitmap_isset(c->udp.port6_to_ns, port)) {
+ if (c->mode == MODE_PASTA) {
uref.splice = UDP_TO_NS;
- sock_l4(c, AF_INET6, IPPROTO_UDP, port,
+ sock_l4(c, AF_INET6, IPPROTO_UDP, dst,
BIND_LOOPBACK, uref.u32);
- tap_bind = BIND_EXT;
- } else {
- tap_bind = BIND_ANY;
- }
-
- if (bitmap_isset(c->udp.port6_to_tap, port)) {
- uref.splice = 0;
- s = sock_l4(c, AF_INET6, IPPROTO_UDP, port,
- tap_bind, uref.u32);
- if (s > 0)
- udp_tap_map[V6][port].sock = s;
}
}
+ }
+ if (c->v4)
+ udp_sock4_iov_init();
+
+ if (c->v6)
udp_sock6_iov_init();
- }
if (c->mode == MODE_PASTA) {
udp_splice_iov_init();
- clone(udp_sock_init_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
- CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
- (void *)c);
+ NS_CALL(udp_sock_init_ns, c);
}
return 0;
diff --git a/udp.h b/udp.h
index e5f577a..bdafcaf 100644
--- a/udp.h
+++ b/udp.h
@@ -11,6 +11,8 @@ int udp_sock_init(struct ctx *c);
void udp_timer(struct ctx *c, struct timespec *ts);
void udp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
uint32_t *ip_da);
+void udp_remap_to_tap(in_port_t port, in_port_t delta);
+void udp_remap_to_init(in_port_t port, in_port_t delta);
/**
* union udp_epoll_ref - epoll reference portion for TCP connections
@@ -37,20 +39,13 @@ union udp_epoll_ref {
/**
* struct udp_ctx - Execution context for UDP
- * @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap
- * @port4_to_init: IPv4 ports bound namespace-side, spliced to init
- * @port6_to_init: IPv6 ports bound namespace-side, spliced to init
- * @port4_to_ns: IPv4 ports bound init-side, spliced to namespace
- * @port6_to_ns: IPv6 ports bound init-side, spliced to namespace
+ * @port_to_tap: Ports bound host-side, data to tap or ns L4 socket
+ * @port_to_init: Ports bound namespace-side, data to init L4 socket
* @timer_run: Timestamp of most recent timer run
*/
struct udp_ctx {
- uint8_t port4_to_tap [USHRT_MAX / 8];
- uint8_t port6_to_tap [USHRT_MAX / 8];
- uint8_t port4_to_init [USHRT_MAX / 8];
- uint8_t port6_to_init [USHRT_MAX / 8];
- uint8_t port4_to_ns [USHRT_MAX / 8];
- uint8_t port6_to_ns [USHRT_MAX / 8];
+ uint8_t port_to_tap [USHRT_MAX / 8];
+ uint8_t port_to_init [USHRT_MAX / 8];
struct timespec timer_run;
};
diff --git a/util.c b/util.c
index 6e0630f..6ce7c9a 100644
--- a/util.c
+++ b/util.c
@@ -36,7 +36,6 @@
#include "util.h"
#include "passt.h"
-#ifdef DEBUG
#define logfn(name, level) \
void name(const char *format, ...) { \
char ts[sizeof("Mmm dd hh:mm:ss.")]; \
@@ -44,37 +43,33 @@ void name(const char *format, ...) { \
struct tm *tm; \
va_list args; \
\
- clock_gettime(CLOCK_REALTIME, &tp); \
- tm = gmtime(&tp.tv_sec); \
- strftime(ts, sizeof(ts), "%b %d %T.", tm); \
+ if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \
+ clock_gettime(CLOCK_REALTIME, &tp); \
+ tm = gmtime(&tp.tv_sec); \
+ strftime(ts, sizeof(ts), "%b %d %T.", tm); \
\
- fprintf(stderr, "%s%04lu: ", ts, tp.tv_nsec / (100 * 1000)); \
- va_start(args, format); \
- vsyslog(level, format, args); \
- va_end(args); \
- va_start(args, format); \
- vfprintf(stderr, format, args); \
- va_end(args); \
- if (format[strlen(format)] != '\n') \
- fprintf(stderr, "\n"); \
-}
-#else
-#define logfn(name, level) \
-void name(const char *format, ...) { \
- va_list args; \
+ fprintf(stderr, "%s%04lu: ", ts, \
+ tp.tv_nsec / (100 * 1000)); \
+ } \
\
va_start(args, format); \
vsyslog(level, format, args); \
va_end(args); \
+ \
+ if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \
+ setlogmask(0) == LOG_MASK(LOG_EMERG)) { \
+ va_start(args, format); \
+ vfprintf(stderr, format, args); \
+ va_end(args); \
+ if (format[strlen(format)] != '\n') \
+ fprintf(stderr, "\n"); \
+ } \
}
-#endif
logfn(err, LOG_ERR)
logfn(warn, LOG_WARNING)
logfn(info, LOG_INFO)
-#ifdef DEBUG
logfn(debug, LOG_DEBUG)
-#endif
/**
* ipv6_l4hdr() - Find pointer to L4 header in IPv6 packet and extract protocol
@@ -171,12 +166,16 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
sa = (const struct sockaddr *)&addr4;
sl = sizeof(addr4);
} else {
- if (bind_addr == BIND_LOOPBACK)
+ if (bind_addr == BIND_LOOPBACK) {
addr6.sin6_addr = in6addr_loopback;
- else if (bind_addr == BIND_EXT)
+ } else if (bind_addr == BIND_EXT) {
addr6.sin6_addr = c->addr6;
- else
+ } else if (bind_addr == BIND_LL) {
+ addr6.sin6_addr = c->addr6_ll;
+ addr6.sin6_scope_id = if_nametoindex(c->ifn);
+ } else {
addr6.sin6_addr = in6addr_any;
+ }
sa = (const struct sockaddr *)&addr6;
sl = sizeof(addr6);
diff --git a/util.h b/util.h
index 221df97..2e7699c 100644
--- a/util.h
+++ b/util.h
@@ -1,12 +1,7 @@
void err(const char *format, ...);
void warn(const char *format, ...);
void info(const char *format, ...);
-
-#ifdef DEBUG
void debug(const char *format, ...);
-#else
-#define debug(...) { }
-#endif
#define CHECK_SET_MIN_MAX(basename, fd) \
do { \
@@ -53,6 +48,14 @@ void debug(const char *format, ...);
#define PORT_IS_EPHEMERAL(port) ((port) >= PORT_EPHEMERAL_MIN)
#define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 4)
+#define NS_CALL(fn, arg) \
+ do { \
+ char ns_fn_stack[NS_FN_STACK_SIZE]; \
+ \
+ clone((fn), ns_fn_stack + sizeof(ns_fn_stack) / 2, \
+ CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, \
+ (void *)(arg)); \
+ } while (0)
#if __BYTE_ORDER == __BIG_ENDIAN
#define L2_BUF_ETH_IP4_INIT \
@@ -120,6 +123,7 @@ void debug(const char *format, ...);
enum bind_type {
BIND_ANY = 0,
BIND_LOOPBACK,
+ BIND_LL,
BIND_EXT,
};