diff options
-rw-r--r-- | Makefile | 17 | ||||
-rw-r--r-- | conf.c | 1163 | ||||
-rw-r--r-- | conf.h | 1 | ||||
-rw-r--r-- | dhcp.c | 9 | ||||
-rw-r--r-- | dhcpv6.c | 3 | ||||
-rw-r--r-- | ndp.c | 14 | ||||
-rw-r--r-- | passt.c | 524 | ||||
-rw-r--r-- | passt.h | 40 | ||||
-rw-r--r-- | pcap.c | 60 | ||||
-rw-r--r-- | qrap.c | 5 | ||||
-rw-r--r-- | tap.c | 92 | ||||
-rw-r--r-- | tcp.c | 137 | ||||
-rw-r--r-- | tcp.h | 18 | ||||
-rw-r--r-- | udp.c | 174 | ||||
-rw-r--r-- | udp.h | 17 | ||||
-rw-r--r-- | util.c | 47 | ||||
-rw-r--r-- | util.h | 14 |
17 files changed, 1644 insertions, 691 deletions
@@ -6,20 +6,15 @@ all: passt pasta passt4netns qrap avx2: CFLAGS += -Ofast -mavx2 -ftree-vectorize -funroll-loops avx2: clean all -avx2_debug: CFLAGS += -Ofast -mavx2 -ftree-vectorize -funroll-loops -DDEBUG -g -avx2_debug: clean all - static: CFLAGS += -static static: clean all -debug: CFLAGS += -static -DDEBUG -g -debug: clean all - -passt: passt.c passt.h arp.c arp.h checksum.c checksum.h dhcp.c dhcp.h \ - dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h siphash.c siphash.h \ - tap.c tap.h icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h - $(CC) $(CFLAGS) passt.c arp.c checksum.c dhcp.c dhcpv6.c pcap.c ndp.c \ - siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt +passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \ + dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \ + siphash.c siphash.h tap.c tap.h icmp.c icmp.h tcp.c tcp.h \ + udp.c udp.h util.c util.h + $(CC) $(CFLAGS) passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c \ + pcap.c ndp.c siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt pasta: passt ln -s passt pasta @@ -0,0 +1,1163 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +/* PASST - Plug A Simple Socket Transport + * for qemu/UNIX domain socket mode + * + * PASTA - Pack A Subtle Tap Abstraction + * for network namespace/tap device mode + * + * conf.c - Configuration settings and option parsing + * + * Copyright (c) 2020-2021 Red Hat GmbH + * Author: Stefano Brivio <sbrivio@redhat.com> + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <getopt.h> +#include <string.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <ifaddrs.h> +#include <limits.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <syslog.h> +#include <time.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <linux/if_ether.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include "util.h" +#include "passt.h" +#include "udp.h" +#include "tcp.h" + +/** + * struct get_bound_ports_ns_arg - Arguments for get_bound_ports_ns() + * @c: Execution context + * @proto: Protocol number (IPPROTO_TCP or IPPROTO_UDP) + */ +struct get_bound_ports_ns_arg { + struct ctx *c; + uint8_t proto; +}; + +/** + * get_bound_ports_ns() - Get maps of ports namespace with bound sockets + * @arg: See struct get_bound_ports_ns_arg + * + * Return: 0 + */ +static int get_bound_ports_ns(void *arg) +{ + struct get_bound_ports_ns_arg *a = (struct get_bound_ports_ns_arg *)arg; + struct ctx *c = a->c; + + if (!c->pasta_pid || ns_enter(c->pasta_pid)) + return 0; + + if (a->proto == IPPROTO_UDP) { + procfs_scan_listen("udp", c->udp.port_to_tap); + procfs_scan_listen("udp6", c->udp.port_to_tap); + + procfs_scan_listen("tcp", c->udp.port_to_tap); + procfs_scan_listen("tcp6", c->udp.port_to_tap); + } else if (a->proto == IPPROTO_TCP) { + procfs_scan_listen("tcp", c->tcp.port_to_tap); + procfs_scan_listen("tcp6", c->tcp.port_to_tap); + } + + return 0; +} + +/** + * get_bound_ports() - Get maps of ports in init namespace with bound sockets + * @c: Execution context + * @proto: Protocol number (IPPROTO_TCP or IPPROTO_UDP) + */ +static void get_bound_ports(struct ctx *c, uint8_t proto) +{ + if (proto == IPPROTO_UDP) { + procfs_scan_listen("udp", c->udp.port_to_init); + procfs_scan_listen("udp6", c->udp.port_to_init); + + procfs_scan_listen("tcp", c->udp.port_to_init); + procfs_scan_listen("tcp6", c->udp.port_to_init); + } else if (proto == IPPROTO_TCP) { + procfs_scan_listen("tcp", c->tcp.port_to_init); + procfs_scan_listen("tcp6", c->tcp.port_to_init); + } +} + +enum conf_port_type { + PORT_SPEC = 1, + PORT_NONE, + PORT_AUTO, + PORT_ALL, +}; + +static int conf_ports(struct ctx *c, char optname, const char *optarg, + enum conf_port_type *set) +{ + int start_src = -1, end_src = -1, start_dst = -1, end_dst = -1; + void (*remap)(in_port_t port, in_port_t delta); + const char *p; + uint8_t *map; + char *sep; + + if (optname == 't') { + map = c->tcp.port_to_tap; + remap = tcp_remap_to_tap; + } else if (optname == 'T') { + map = c->tcp.port_to_init; + remap = tcp_remap_to_init; + } else if (optname == 'u') { + map = c->udp.port_to_tap; + remap = udp_remap_to_tap; + } else if (optname == 'U') { + map = c->udp.port_to_init; + remap = udp_remap_to_init; + } else { /* For gcc -O3 */ + return 0; + } + + if (!strcmp(optarg, "none")) { + if (*set) + return -EINVAL; + *set = PORT_NONE; + return 0; + } + + if (!strcmp(optarg, "auto")) { + if (*set || c->mode != MODE_PASTA) + return -EINVAL; + *set = PORT_AUTO; + return 0; + } + + if (!strcmp(optarg, "all")) { + if (*set || c->mode != MODE_PASST) + return -EINVAL; + *set = PORT_ALL; + memset(map, 0xff, PORT_EPHEMERAL_MIN / 8); + return 0; + } + + if (*set > PORT_SPEC) + return -EINVAL; + + *set = PORT_SPEC; + + if (strspn(optarg, "0123456789-,:") != strlen(optarg)) { + err("Invalid port specifier %s", optarg); + return -EINVAL; + } + + p = optarg; + do { + int i, port; + + port = strtol(p, &sep, 10); + if (sep == p) + break; + + if (port < 0 || port > USHRT_MAX || errno) + goto bad; + + /* -p 22 + * ^ start_src end_src == start_dst == end_dst == -1 + * + * -p 22-25 + * | ^ end_src + * ` start_src start_dst == end_dst == -1 + * + * -p 80:8080 + * | ^ start_dst + * ` start_src end_src == end_dst == -1 + * + * -p 22-80:8022-8080 + * | | | ^ end_dst + * | | ` start_dst + * | ` end_dst + * ` start_src + */ + switch (*sep) { + case '-': + if (start_src == -1) { /* 22-... */ + start_src = port; + } else { + if (!end_src) /* 22:8022-8080 */ + goto bad; + start_dst = port; /* 22-80:8022-... */ + } + break; + case ':': + if (start_src == -1) /* 80:... */ + start_src = end_src = port; + else if (end_src == -1) /* 22-80:... */ + end_src = port; + else /* 22-80:8022:... */ + goto bad; + break; + case ',': + case 0: + if (start_src == -1) /* 80 */ + start_src = end_src = port; + else if (end_src == -1) /* 22-25 */ + end_src = port; + else if (start_dst == -1) /* 80:8080 */ + start_dst = end_dst = port; + else if (end_dst == -1) /* 22-80:8022-8080 */ + end_dst = port; + else + goto bad; + + if (start_src > end_src) /* 80-22 */ + goto bad; + + if (start_dst > end_dst) /* 22-80:8080:8022 */ + goto bad; + + if (end_dst != -1 && + end_dst - start_dst != end_src - start_src) + goto bad; /* 22-81:8022:8080 */ + + for (i = start_src; i <= end_src; i++) { + if (bitmap_isset(map, i)) + goto overlap; + + bitmap_set(map, i); + + if (start_dst == -1) /* 22 or 22-80 */ + continue; + + /* 80:8080 or 22-80:8080:8080 */ + remap(i, (in_port_t)(start_dst - start_src)); + } + + start_src = end_src = start_dst = end_dst = -1; + break; + } + p = sep + 1; + } while (*sep); + + return 0; +bad: + err("Invalid port specifier %s", optarg); + return -EINVAL; + +overlap: + err("Overlapping port specifier %s", optarg); + return -EINVAL; +} + +/** + * struct nl_request - Netlink request filled and sent by get_routes() + * @nlh: Netlink message header + * @rtm: Routing Netlink message + */ +struct nl_request { + struct nlmsghdr nlh; + struct rtmsg rtm; +}; + +/** + * get_routes() - Get default route and fill in routable interface name + * @c: Execution context + */ +static void get_routes(struct ctx *c) +{ + struct nl_request req = { + .nlh.nlmsg_type = RTM_GETROUTE, + .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL, + .nlh.nlmsg_len = sizeof(struct nl_request), + .nlh.nlmsg_seq = 1, + .rtm.rtm_family = AF_INET, + .rtm.rtm_table = RT_TABLE_MAIN, + .rtm.rtm_scope = RT_SCOPE_UNIVERSE, + .rtm.rtm_type = RTN_UNICAST, + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + struct nlmsghdr *nlh; + int s, n, na, v4, v6; + char ifn[IFNAMSIZ]; + struct rtattr *rta; + struct rtmsg *rtm; + char buf[BUFSIZ]; + + if (!c->v4 && !c->v6) + v4 = v6 = -1; + else + v6 = -!(v4 = -c->v4); + + s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (s < 0) { + perror("netlink socket"); + goto out; + } + + if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + perror("netlink bind"); + goto out; + } + +v6: + if (send(s, &req, sizeof(req), 0) < 0) { + perror("netlink send"); + goto out; + } + + n = recv(s, &buf, sizeof(buf), 0); + if (n < 0) { + perror("netlink recv"); + goto out; + } + + nlh = (struct nlmsghdr *)buf; + for ( ; NLMSG_OK(nlh, n); nlh = NLMSG_NEXT(nlh, n)) { + rtm = (struct rtmsg *)NLMSG_DATA(nlh); + + if (rtm->rtm_dst_len || + (rtm->rtm_family != AF_INET && rtm->rtm_family != AF_INET6)) + continue; + + /* Filter on interface only if already given */ + if (*c->ifn) { + *ifn = 0; + for (rta = (struct rtattr *)RTM_RTA(rtm), + na = RTM_PAYLOAD(nlh); + RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + if (rta->rta_type != RTA_OIF) + continue; + + if_indextoname(*(unsigned *)RTA_DATA(rta), ifn); + break; + } + + if (strcmp(ifn, c->ifn)) + goto next; + } + + for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nlh); + RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + if (!*c->ifn && rta->rta_type == RTA_OIF) + if_indextoname(*(unsigned *)RTA_DATA(rta), ifn); + + if (v4 && rta->rta_type == RTA_GATEWAY && + rtm->rtm_family == AF_INET) { + if (!c->gw4) { + memcpy(&c->gw4, RTA_DATA(rta), + sizeof(c->gw4)); + } + v4 = 1; + } + + if (v6 && rta->rta_type == RTA_GATEWAY && + rtm->rtm_family == AF_INET6) { + if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6)) { + memcpy(&c->gw6, RTA_DATA(rta), + sizeof(c->gw6)); + } + v6 = 1; + } + } + +next: + if (nlh->nlmsg_type == NLMSG_DONE) + break; + } + + if (v6 == -1) { + req.rtm.rtm_family = AF_INET6; + req.nlh.nlmsg_seq++; + recv(s, &buf, sizeof(buf), 0); + v6--; + goto v6; + } else if (v6 < 0) { + v6 = 0; + } + +out: + close(s); + + if ((v4 <= 0 && v6 <= 0) || (!*c->ifn && !*ifn)) { + err("No routing information"); + exit(EXIT_FAILURE); + } + + if (!*c->ifn) + strncpy(c->ifn, ifn, IFNAMSIZ); + c->v4 = v4; + c->v6 = v6; +} + +/** + * get_addrs() - Fetch MAC, IP addresses, masks of external routable interface + * @c: Execution context + */ +static void get_addrs(struct ctx *c) +{ + struct ifreq ifr = { + .ifr_addr.sa_family = AF_INET, + }; + struct ifaddrs *ifaddr, *ifa; + int s, v4 = 0, v6 = 0; + + if (getifaddrs(&ifaddr) == -1) { + perror("getifaddrs"); + goto out; + } + + if (c->addr4) { + c->addr4_seen = c->addr4; + v4 = 1; + } + + if (!IN6_IS_ADDR_UNSPECIFIED(&c->addr6)) { + memcpy(&c->addr6_seen, &c->addr6, sizeof(c->addr6)); + memcpy(&c->addr6_ll_seen, &c->addr6, sizeof(c->addr6)); + v6 = 1; + } + + /* Fill in any missing information */ + for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) { + struct sockaddr_in *in_addr; + struct sockaddr_in *in_mask; + struct sockaddr_in6 *in6_addr; + + if (strcmp(ifa->ifa_name, c->ifn)) + continue; + + if (!ifa->ifa_addr) + continue; + + in_addr = (struct sockaddr_in *)ifa->ifa_addr; + if (ifa->ifa_addr->sa_family == AF_INET && !c->addr4) { + c->addr4_seen = c->addr4 = in_addr->sin_addr.s_addr; + v4 = 1; + } + + if (ifa->ifa_addr->sa_family == AF_INET && !c->mask4 && + in_addr->sin_addr.s_addr == c->addr4) { + in_mask = (struct sockaddr_in *)ifa->ifa_netmask; + c->mask4 = in_mask->sin_addr.s_addr; + } + + if (ifa->ifa_addr->sa_family == AF_INET6) { + in6_addr = (struct sockaddr_in6 *)ifa->ifa_addr; + if (IN6_IS_ADDR_LINKLOCAL(&in6_addr->sin6_addr) && + IN6_IS_ADDR_UNSPECIFIED(&c->addr6_ll)) { + memcpy(&c->addr6_ll, &in6_addr->sin6_addr, + sizeof(c->addr6_ll)); + } else if (IN6_IS_ADDR_UNSPECIFIED(&c->addr6)) { + memcpy(&c->addr6, &in6_addr->sin6_addr, + sizeof(c->addr6)); + memcpy(&c->addr6_seen, &in6_addr->sin6_addr, + sizeof(c->addr6_seen)); + memcpy(&c->addr6_ll_seen, &in6_addr->sin6_addr, + sizeof(c->addr6_seen)); + v6 = 1; + } + } + } + + freeifaddrs(ifaddr); + + if (v4 < c->v4 || v6 < c->v6) + goto out; + + if (v4 && !c->mask4) { + if (IN_CLASSA(ntohl(c->addr4))) + c->mask4 = htonl(IN_CLASSA_NET); + else if (IN_CLASSB(ntohl(c->addr4))) + c->mask4 = htonl(IN_CLASSB_NET); + else if (IN_CLASSC(ntohl(c->addr4))) + c->mask4 = htonl(IN_CLASSC_NET); + else + c->mask4 = 0xffffffff; + } + + if (!memcmp(c->mac, ((uint8_t [ETH_ALEN]){ 0 }), ETH_ALEN)) { + s = socket(AF_INET, SOCK_DGRAM, 0); + if (s < 0) { + perror("socket SIOCGIFHWADDR"); + goto out; + } + + strncpy(ifr.ifr_name, c->ifn, IF_NAMESIZE); + if (ioctl(s, SIOCGIFHWADDR, &ifr) < 0) { + perror("SIOCGIFHWADDR"); + goto out; + } + + close(s); + memcpy(c->mac, ifr.ifr_hwaddr.sa_data, ETH_ALEN); + } + + memset(&c->mac_guest, 0xff, sizeof(c->mac_guest)); + + return; +out: + err("Couldn't get addresses for routable interface"); + exit(EXIT_FAILURE); +} + +/** + * get_dns() - Get nameserver addresses from local /etc/resolv.conf + * @c: Execution context + */ +static void get_dns(struct ctx *c) +{ + int dns4_set, dns6_set, dnss_set, dns_set; + struct in6_addr *dns6 = &c->dns6[0]; + struct fqdn *s = c->dns_search; + uint32_t *dns4 = &c->dns4[0]; + char buf[BUFSIZ], *p, *end; + FILE *r; + + dns4_set = !!*dns4; + dns6_set = !IN6_IS_ADDR_UNSPECIFIED(&dns6); + dnss_set = !!*s->n || c->no_dns_search; + dns_set = dns4_set || dns6_set || c->no_dns; + + if (dns_set && dnss_set) + return; + + r = fopen("/etc/resolv.conf", "r"); + if (!r) + goto out; + + while (fgets(buf, BUFSIZ, r)) { + if (!dns_set && strstr(buf, "nameserver ") == buf) { + p = strrchr(buf, ' '); + if (!p) + continue; + + end = strpbrk(buf, "%\n"); + if (end) + *end = 0; + + if (dns4 - &c->dns4[0] < ARRAY_SIZE(c->dns4) && + inet_pton(AF_INET, p + 1, dns4)) + dns4++; + + if (dns6 - &c->dns6[0] < ARRAY_SIZE(c->dns6) && + inet_pton(AF_INET6, p + 1, dns6)) + dns6++; + } else if (!dnss_set && strstr(buf, "search ") == buf && + s == c->dns_search) { + end = strpbrk(buf, "\n"); + if (end) + *end = 0; + + p = strtok(buf, " \t"); + while ((p = strtok(NULL, " \t")) && + s - c->dns_search < ARRAY_SIZE(c->dns_search)) { + strncpy(s->n, p, sizeof(c->dns_search[0])); + s++; + } + } + } + + fclose(r); + +out: + if (!dns_set && dns4 == c->dns4 && dns6 == c->dns6) + warn("Couldn't get any nameserver address"); +} + +/** + * usage() - Print usage and exit + * @name: Executable name + */ +static void usage(const char *name) +{ + if (strstr(name, "pasta") || strstr(name, "passt4netns")) { + info("Usage: %s [OPTION]... [TARGET_PID]", name); + info(""); + info("Without TARGET_PID, enter a user and network namespace,"); + info("run the default shell and connect it via pasta."); + } else { + info("Usage: %s [OPTION]...", name); + } + info(""); + + info( " -d, --debug Be verbose, don't run in background"); + info( " -q, --quiet Don't print informational messages"); + info( " -f, --foreground Don't run in background"); + info( " default: run in background if started from a TTY"); + info( " -e, --stderr Log to stderr too"); + info( " default: log to system logger only if started from a TTY"); + info( " -h, --help Display this help message and exit"); + + if (strstr(name, "pasta") || strstr(name, "passt4netns")) { + info( " -I, --ns-ifname NAME namespace interface name"); + info( " default: same interface name as external one"); + } else { + info( " -s, --socket PATH UNIX domain socket path"); + info( " default: probe free path starting from " + UNIX_SOCK_PATH, 1); + } + + info( " -p, --pcap [FILE] Log tap-facing traffic to pcap file"); + info( " if FILE is not given, log to:"); + + if (strstr(name, "pasta") || strstr(name, "passt4netns")) + info(" pasta_ISO8601-TIMESTAMP_INSTANCE-NUMBER.pcap"); + else + info(" passt_ISO8601-TIMESTAMP_INSTANCE-NUMBER.pcap"); + + info( " -m, --mtu MTU Assign MTU via DHCP/NDP"); + info( " default: no MTU assigned via DHCP/NDP options"); + info( " -a, --address ADDR Assign IPv4 or IPv6 address ADDR"); + info( " can be specified zero to two times (for IPv4 and IPv6)"); + info( " default: use addresses from interface with default route"); + info( " -n, --netmask MASK Assign IPv4 MASK, dot-decimal or bits"); + info( " default: netmask from matching address on the host"); + info( " -M, --mac-addr ADDR Use source MAC address ADDR"); + info( " default: MAC address from interface with default route"); + info( " -g, --gateway ADDR Pass IPv4 or IPv6 address as gateway"); + info( " default: gateway from interface with default route"); + info( " -i, --interface NAME Interface for addresses and routes"); + info( " default: interface with first default route"); + info( " -D, --dns ADDR Pass IPv4 or IPv6 address as DNS"); + info( " can be specified multiple times"); + info( " a single, empty option disables DNS information"); + if (strstr(name, "pasta") || strstr(name, "passt4netns")) + info( " default: don't send any addresses"); + else + info( " default: use addresses from /etc/resolv.conf"); + + info( " -S, --search LIST Space-separated list, search domains"); + info( " a single, empty option disables the DNS search list"); + if (strstr(name, "pasta") || strstr(name, "passt4netns")) + info( " default: don't send any search list"); + else + info( " default: use search list from /etc/resolv.conf"); + + info( " --no-tcp Disable TCP protocol handler"); + info( " --no-udp Disable UDP protocol handler"); + info( " --no-icmp Disable ICMP/ICMPv6 protocol handler"); + info( " --no-dhcp Disable DHCP server"); + info( " --no-ndp Disable NDP responses"); + info( " --no-dhcpv6 Disable DHCPv6 server"); + info( " --no-ra Disable router advertisements"); + info( " -4, --ipv4-only Enable IPv4 operation only"); + info( " -6, --ipv6-only Enable IPv6 operation only"); + + if (strstr(name, "pasta") || strstr(name, "passt4netns")) + goto pasta_ports; + + info( " -t, --tcp-ports SPEC TCP port forwarding to guest"); + info( " can be specified multiple times"); + info( " SPEC can be:"); + info( " 'none': don't forward any ports"); + info( " 'all': forward all unbound, non-ephemeral ports"); + info( " a comma-separated of ports, optionally ranged with '-'"); + info( " and optional target ports after ':'. Examples:"); + info( " -t 22 Forward local port 22 to 22 on guest"); + info( " -t 22:23 Forward local port 22 to 23 on guest"); + info( " -t 22,25 Forward ports 22, 25 to ports 22, 25"); + info( " -t 22-80 Forward ports 22 to 80"); + info( " -t 22-80:32-90 Forward ports 22 to 80 to"); + info( " corresponding port numbers plus 10"); + info( " default: none"); + info( " -u, --udp-ports SPEC UDP port forwarding to guest"); + info( " SPEC is as described for TCP above"); + info( " default: none"); + info( " unless overridden, UDP ports with numbers corresponding"); + info( " to forwarded TCP port numbers are forwarded too, and"); + info( " IPv6 bound ports are also forwarded for IPv4"); + + exit(EXIT_FAILURE); + +pasta_ports: + info( " -t, --tcp-ports SPEC TCP port forwarding to namespace"); + info( " can be specified multiple times"); + info( " SPEC can be:"); + info( " 'none': don't forward any ports"); + info( " 'auto': forward all ports currently bound in namespace"); + info( " a comma-separated of ports, optionally ranged with '-'"); + info( " and optional target ports after ':'. Examples:"); + info( " -t 22 Forward local port 22 to port 22 in netns"); + info( " -t 22:23 Forward local port 22 to port 23"); + info( " -t 22,25 Forward ports 22, 25 to ports 22, 25"); + info( " -t 22-80 Forward ports 22 to 80"); + info( " -t 22-80:32-90 Forward ports 22 to 80 to"); + info( " corresponding port numbers plus 10"); + info( " default: auto"); + info( " IPv6 bound ports are also forwarded for IPv4"); + info( " -u, --udp-ports SPEC UDP port forwarding to namespace"); + info( " SPEC is as described for TCP above"); + info( " default: auto"); + info( " unless overridden, UDP ports with numbers corresponding"); + info( " to forwarded TCP port numbers are forwarded too, and"); + info( " IPv6 bound ports are also forwarded for IPv4"); + info( " -T, --tcp-ns SPEC TCP port forwarding to init namespace"); + info( " SPEC is as described above"); + info( " default: auto"); + info( " -U, --udp-ns SPEC UDP port forwarding to init namespace"); + info( " SPEC is as described above"); + info( " default: auto"); + + exit(EXIT_FAILURE); +} + +void conf_print(struct ctx *c) +{ + char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN]; + int i; + + if (c->mode == MODE_PASTA) { + info("Outbound interface: %s, namespace interface: %s", + c->ifn, c->pasta_ifn); + } else { + info("Outbound interface: %s", c->ifn); + } + + if (c->v4) { + info("ARP:"); + info(" address: %02x:%02x:%02x:%02x:%02x:%02x", + c->mac[0], c->mac[1], c->mac[2], + c->mac[3], c->mac[4], c->mac[5]); + + if (!c->no_dhcp) { + info("DHCP:"); + info(" assign: %s", + inet_ntop(AF_INET, &c->addr4, buf4, sizeof(buf4))); + info(" mask: %s", + inet_ntop(AF_INET, &c->mask4, buf4, sizeof(buf4))); + info(" router: %s", + inet_ntop(AF_INET, &c->gw4, buf4, sizeof(buf4))); + } + } + + if (!c->no_dns && !(c->no_dhcp && c->no_ndp && c->no_dhcpv6)) { + for (i = 0; c->dns4[i]; i++) { + if (!i) + info(" DNS:"); + inet_ntop(AF_INET, &c->dns4[i], buf4, sizeof(buf4)); + info(" %s", buf4); + } + } + + if (!c->no_dns_search && !(c->no_dhcp && c->no_ndp && c->no_dhcpv6)) { + for (i = 0; *c->dns_search[i].n; i++) { + if (!i) + info(" search:"); + info(" %s", c->dns_search[i].n); + } + } + + if (c->v6) { + if (!c->no_ndp && !c->no_dhcpv6) + info("NDP/DHCPv6:"); + else if (!c->no_ndp) + info("DHCPv6:"); + else if (!c->no_dhcpv6) + info("NDP:"); + else + return; + + info(" assign: %s", + inet_ntop(AF_INET6, &c->addr6, buf6, sizeof(buf6))); + info(" router: %s", + inet_ntop(AF_INET6, &c->gw6, buf6, sizeof(buf6))); + + for (i = 0; !IN6_IS_ADDR_UNSPECIFIED(&c->dns6[i]); i++) { + if (!i) + info(" DNS:"); + inet_ntop(AF_INET6, &c->dns6[i], buf6, sizeof(buf6)); + info(" %s", buf6); + } + + for (i = 0; *c->dns_search[i].n; i++) { + if (!i) + info(" search:"); + info(" %s", c->dns_search[i].n); + } + } +} + +/** + * conf() - Process command-line arguments and set configuration + * @c: Execution context + * @argc: Argument count + * @argv: Options, plus target PID for pasta mode + */ +void conf(struct ctx *c, int argc, char **argv) +{ + struct option options[] = { + {"debug", no_argument, NULL, 'd' }, + {"quiet", no_argument, NULL, 1 }, + {"foreground", no_argument, NULL, 'f' }, + {"stderr", no_argument, &c->stderr, 1 }, + {"help", no_argument, NULL, 'h' }, + {"socket", required_argument, NULL, 's' }, + {"ns-ifname", required_argument, NULL, 'I' }, + {"pcap", optional_argument, NULL, 'p' }, + {"mtu", required_argument, NULL, 'm' }, + {"address", required_argument, NULL, 'a' }, + {"netmask", required_argument, NULL, 'n' }, + {"mac-addr", required_argument, NULL, 'M' }, + {"gateway", required_argument, NULL, 'g' }, + {"interface", required_argument, NULL, 'i' }, + {"dns", optional_argument, NULL, 'D' }, + {"search", optional_argument, NULL, 'S' }, + {"no-tcp", no_argument, &c->no_tcp, 1 }, + {"no-udp", no_argument, &c->no_udp, 1 }, + {"no-icmp", no_argument, &c->no_icmp, 1 }, + {"no-dhcp", no_argument, &c->no_dhcp, 1 }, + {"no-dhcpv6", no_argument, &c->no_dhcpv6, 1 }, + {"no-ndp", no_argument, &c->no_ndp, 1 }, + {"no-ra", no_argument, &c->no_ra, 1 }, + {"ipv4-only", no_argument, &c->v4, '4' }, + {"ipv6-only", no_argument, &c->v6, '6' }, + {"tcp-ports", required_argument, NULL, 't' }, + {"udp-ports", required_argument, NULL, 'u' }, + {"tcp-ns", required_argument, NULL, 'T' }, + {"udp-ns", required_argument, NULL, 'U' }, + { 0 }, + }; + struct get_bound_ports_ns_arg ns_ports_arg = { .c = c }; + enum conf_port_type tcp_tap = 0, tcp_init = 0; + enum conf_port_type udp_tap = 0, udp_init = 0; + struct fqdn *dnss = c->dns_search; + struct in6_addr *dns6 = c->dns6; + int name, ret, mask, b, i; + uint32_t *dns4 = c->dns4; + + do { + enum conf_port_type *set; + const char *optstring; + + if (c->mode == MODE_PASST) + optstring = "dqfehs:p::m:a:n:M:g:i:D::S::46t:u:"; + else + optstring = "dqfehI:p::m:a:n:M:g:i:D::S::46t:u:T:U:"; + + name = getopt_long(argc, argv, optstring, options, NULL); + + if ((name == 'p' || name == 'D' || name == 'S') && !optarg && + optind < argc && *argv[optind] && *argv[optind] != '-') { + if (c->mode == MODE_PASTA) { + char *endptr; + + strtol(argv[optind], &endptr, 10); + if (*endptr) + optarg = argv[optind++]; + } else { + optarg = argv[optind++]; + } + } + + switch (name) { + case -1: + case 0: + break; + case 'd': + if (c->debug) { + err("Multiple --debug options given"); + usage(argv[0]); + } + + if (c->quiet) { + err("Either --debug or --quiet"); + usage(argv[0]); + } + + c->debug = 1; + c->foreground = 1; + break; + case 'q': + if (c->quiet) { + err("Multiple --quiet options given"); + usage(argv[0]); + } + + if (c->debug) { + err("Either --debug or --quiet"); + usage(argv[0]); + } + + c->quiet = 1; + break; + case 'f': + if (c->foreground && !c->debug) { + err("Multiple --foreground options given"); + usage(argv[0]); + } + + c->foreground = 1; + break; + case '?': + case 'h': + usage(argv[0]); + break; + case 's': + if (*c->sock_path) { + err("Multiple --socket options given"); + usage(argv[0]); + } + + ret = snprintf(c->sock_path, sizeof(c->sock_path), "%s", + optarg); + if (ret <= 0 || ret >= (int)sizeof(c->pcap)) { + err("Invalid socket path: %s", optarg); + usage(argv[0]); + } + break; + case 'I': + if (*c->pasta_ifn) { + err("Multiple --ns-ifname options given"); + usage(argv[0]); + } + + ret = snprintf(c->pasta_ifn, sizeof(c->pasta_ifn), "%s", + optarg); + if (ret <= 0 || ret >= (int)sizeof(c->pasta_ifn)) { + err("Invalid interface name: %s", optarg); + usage(argv[0]); + } + break; + case 'p': + if (*c->pcap) { + err("Multiple --pcap options given"); + usage(argv[0]); + } + + if (!optarg) { + *c->pcap = 1; + break; + } + + ret = snprintf(c->pcap, sizeof(c->pcap), "%s", optarg); + if (ret <= 0 || ret >= (int)sizeof(c->pcap)) { + err("Invalid pcap path: %s", optarg); + usage(argv[0]); + } + break; + case 'm': + if (c->mtu) { + err("Multiple --mtu options given"); + usage(argv[0]); + } + + errno = 0; + c->mtu = strtol(optarg, NULL, 0); + if (c->mtu < ETH_MIN_MTU || c->mtu > (int)ETH_MAX_MTU || + errno) { + err("Invalid MTU: %s", optarg); + usage(argv[0]); + } + break; + case 'a': + if (IN6_IS_ADDR_UNSPECIFIED(&c->addr6) && + inet_pton(AF_INET6, optarg, &c->addr6) && + !IN6_IS_ADDR_UNSPECIFIED(&c->addr6) && + !IN6_IS_ADDR_LOOPBACK(&c->addr6) && + !IN6_IS_ADDR_V4MAPPED(&c->addr6) && + !IN6_IS_ADDR_V4COMPAT(&c->addr6) && + !IN6_IS_ADDR_MULTICAST(&c->addr6)) + break; + + if (c->addr4 == INADDR_ANY && + inet_pton(AF_INET, optarg, &c->addr4) && + c->addr4 != INADDR_ANY && + c->addr4 != INADDR_BROADCAST && + c->addr4 != INADDR_LOOPBACK && + !IN_MULTICAST(c->addr4)) + break; + + err("Invalid address: %s", optarg); + usage(argv[0]); + break; + case 'n': + if (inet_pton(AF_INET, optarg, &c->mask4)) + break; + + errno = 0; + mask = strtol(optarg, NULL, 0); + if (mask >= 0 && mask <= 32 && !errno) { + c->mask4 = htonl(0xffffffff << (32 - mask)); + break; + } + + err("Invalid netmask: %s", optarg); + usage(argv[0]); + break; + case 'M': + for (i = 0; i < ETH_ALEN; i++) { + errno = 0; + b = strtol(optarg + i * 3, NULL, 16); + if (b < 0 || b > UCHAR_MAX || errno) { + err("Invalid MAC address: %s", optarg); + usage(argv[0]); + } + c->mac[i] = b; + } + break; + case 'g': + if (IN6_IS_ADDR_UNSPECIFIED(&c->gw6) && + inet_pton(AF_INET6, optarg, &c->gw6) && + !IN6_IS_ADDR_UNSPECIFIED(&c->gw6) && + !IN6_IS_ADDR_LOOPBACK(&c->gw6)) + break; + + if (c->gw4 == INADDR_ANY && + inet_pton(AF_INET, optarg, &c->gw4) && + c->gw4 != INADDR_ANY && + c->gw4 != INADDR_BROADCAST && + c->gw4 != INADDR_LOOPBACK) + break; + + err("Invalid gateway address: %s", optarg); + usage(argv[0]); + break; + case 'i': + if (*c->ifn) { + err("Redundant interface: %s", optarg); + usage(argv[0]); + } + + strncpy(c->ifn, optarg, IFNAMSIZ - 1); + break; + case 'D': + if (c->no_dns || + (!optarg && (dns4 - c->dns4 || dns6 - c->dns6))) { + err("Empty and non-empty DNS options given"); + usage(argv[0]); + } + + if (!optarg) { + c->no_dns = 1; + break; + } + + if (dns4 - &c->dns4[0] < ARRAY_SIZE(c->dns4) && + inet_pton(AF_INET, optarg, dns4)) { + dns4++; + break; + } + + if (dns6 - &c->dns6[0] < ARRAY_SIZE(c->dns6) && + inet_pton(AF_INET6, optarg, dns6)) { + dns6++; + break; + } + + err("Cannot use DNS address %s", optarg); + usage(argv[0]); + break; + case 'S': + if (c->no_dns_search || + (!optarg && dnss != c->dns_search)) { + err("Empty and non-empty DNS search given"); + usage(argv[0]); + } + + if (!optarg) { + c->no_dns_search = 1; + break; + } + + if (dnss - c->dns_search < ARRAY_SIZE(c->dns_search)) { + ret = snprintf(dnss->n, sizeof(*c->dns_search), + "%s", optarg); + dnss++; + + if (ret > 0 && + ret < (int)sizeof(*c->dns_search)) + break; + } + + err("Cannot use DNS search domain %s", optarg); + usage(argv[0]); + break; + case '4': + c->v4 = 1; + break; + case '6': + c->v6 = 1; + break; + case 't': + case 'u': + case 'T': + case 'U': + if (name == 't') + set = &tcp_tap; + else if (name == 'T') + set = &tcp_init; + else if (name == 'u') + set = &udp_tap; + else if (name == 'U') + set = &udp_init; + + if (conf_ports(c, name, optarg, set)) + usage(argv[0]); + + break; + } + } while (name != -1); + + if (c->mode == MODE_PASTA && optind + 1 == argc) { + c->pasta_pid = strtol(argv[optind], NULL, 0); + if (c->pasta_pid < 0 || errno) + usage(argv[0]); + } else if (optind != argc) { + usage(argv[0]); + } + + if (c->v4 && c->v6) { + err("Options ipv4-only and ipv6-only are mutually exclusive"); + usage(argv[0]); + } + + if (c->v4 || c->v6) { + if (!c->v4) + c->no_dhcp = 1; + + if (!c->v6) { + c->no_ndp = 1; + c->no_dhcpv6 = 1; + } + } + + get_routes(c); + get_addrs(c); + + if (c->mode == MODE_PASTA && dns4 == c->dns4 && dns6 == c->dns6) + c->no_dns = 1; + if (c->mode == MODE_PASTA && dnss == c->dns_search) + c->no_dns_search = 1; + get_dns(c); + + if (!*c->pasta_ifn) + strncpy(c->pasta_ifn, c->ifn, IFNAMSIZ); + + if (c->mode == MODE_PASTA) { + if (!tcp_tap || tcp_tap == PORT_AUTO) { + ns_ports_arg.proto = IPPROTO_TCP; + NS_CALL(get_bound_ports_ns, &ns_ports_arg); + } + if (!udp_tap || udp_tap == PORT_AUTO) { + ns_ports_arg.proto = IPPROTO_UDP; + NS_CALL(get_bound_ports_ns, &ns_ports_arg); + } + if (!tcp_init || tcp_init == PORT_AUTO) + get_bound_ports(c, IPPROTO_TCP); + if (!udp_init || udp_init == PORT_AUTO) + get_bound_ports(c, IPPROTO_UDP); + } + + conf_print(c); +} @@ -0,0 +1 @@ +void conf(struct ctx *c, int argc, char **argv); @@ -272,6 +272,9 @@ int dhcp(struct ctx *c, struct ethhdr *eh, size_t len) if (uh->dest != htons(67)) return 0; + if (c->no_dhcp) + return 1; + mlen = len - sizeof(*eh) - iph->ihl * 4 - sizeof(*uh); if (mlen != ntohs(uh->len) - sizeof(*uh) || mlen < offsetof(struct msg, o) || @@ -305,6 +308,12 @@ int dhcp(struct ctx *c, struct ethhdr *eh, size_t len) *(unsigned long *)opts[3].s = c->gw4; *(unsigned long *)opts[54].s = c->gw4; + if (c->mtu) { + opts[26].slen = 2; + opts[26].s[0] = c->mtu / 256; + opts[26].s[1] = c->mtu % 256; + } + for (i = 0, opts[6].slen = 0; c->dns4[i]; i++) { ((uint32_t *)opts[6].s)[i] = c->dns4[i]; opts[6].slen += sizeof(uint32_t); @@ -461,6 +461,9 @@ int dhcpv6(struct ctx *c, struct ethhdr *eh, size_t len) if (!uh || proto != IPPROTO_UDP || uh->dest != htons(547)) return 0; + if (c->no_dhcpv6) + return 1; + if (!IN6_IS_ADDR_MULTICAST(&ip6h->daddr)) return -1; @@ -64,6 +64,9 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len) ih->icmp6_type < RS || ih->icmp6_type > NA) return 0; + if (c->no_ndp) + return 1; + ehr = (struct ethhdr *)buf; ip6hr = (struct ipv6hdr *)(ehr + 1); ihr = (struct icmp6hdr *)(ip6hr + 1); @@ -91,6 +94,9 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len) size_t len = 0; int i, n; + if (c->no_ra) + return 1; + info("NDP: received RS, sending RA"); ihr->icmp6_type = RA; ihr->icmp6_code = 0; @@ -110,6 +116,14 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len) memcpy(p, &c->addr6, 8); /* prefix */ p += 16; + if (c->mtu) { + *p++ = 5; /* type */ + *p++ = 1; /* length */ + p += 2; /* reserved */ + *(uint32_t *)p = htonl(c->mtu); /* MTU */ + p += 4; + } + for (n = 0; !IN6_IS_ADDR_UNSPECIFIED(&c->dns6[n]); n++); if (n) { *p++ = 25; /* RDNSS */ @@ -25,13 +25,13 @@ #include <sys/epoll.h> #include <sys/socket.h> #include <sys/types.h> -#include <sys/ioctl.h> +#include <sys/stat.h> +#include <fcntl.h> #include <sys/resource.h> #include <sys/uio.h> -#include <ifaddrs.h> +#include <sys/wait.h> #include <linux/if_ether.h> #include <linux/if_packet.h> -#include <arpa/inet.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> @@ -46,8 +46,6 @@ #include <netdb.h> #include <string.h> #include <errno.h> -#include <linux/netlink.h> -#include <linux/rtnetlink.h> #include <time.h> #include <syslog.h> #include <sys/stat.h> @@ -60,6 +58,7 @@ #include "udp.h" #include "pcap.h" #include "tap.h" +#include "conf.h" #define EPOLL_EVENTS 10 @@ -68,7 +67,6 @@ char pkt_buf [PKT_BUF_BYTES]; -#ifdef DEBUG char *ip_proto_str[IPPROTO_SCTP + 1] = { [IPPROTO_ICMP] = "ICMP", [IPPROTO_TCP] = "TCP", @@ -76,318 +74,6 @@ char *ip_proto_str[IPPROTO_SCTP + 1] = { [IPPROTO_ICMPV6] = "ICMPV6", [IPPROTO_SCTP] = "SCTP", }; -#endif - -/** - * struct nl_request - Netlink request filled and sent by get_routes() - * @nlh: Netlink message header - * @rtm: Routing Netlink message - */ -struct nl_request { - struct nlmsghdr nlh; - struct rtmsg rtm; -}; - -/** - * get_routes() - Get default route and fill in routable interface name - * @c: Execution context - */ -static void get_routes(struct ctx *c) -{ - struct nl_request req = { - .nlh.nlmsg_type = RTM_GETROUTE, - .nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL, - .nlh.nlmsg_len = sizeof(struct nl_request), - .nlh.nlmsg_seq = 1, - .rtm.rtm_family = AF_INET, - .rtm.rtm_table = RT_TABLE_MAIN, - .rtm.rtm_scope = RT_SCOPE_UNIVERSE, - .rtm.rtm_type = RTN_UNICAST, - }; - struct sockaddr_nl addr = { - .nl_family = AF_NETLINK, - }; - struct nlmsghdr *nlh; - struct rtattr *rta; - struct rtmsg *rtm; - char buf[BUFSIZ]; - int s, n, na; - - c->v6 = -1; - - s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (s < 0) { - perror("netlink socket"); - goto out; - } - - if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) { - perror("netlink bind"); - goto out; - } - -v6: - if (send(s, &req, sizeof(req), 0) < 0) { - perror("netlink send"); - goto out; - } - - n = recv(s, &buf, sizeof(buf), 0); - if (n < 0) { - perror("netlink recv"); - goto out; - } - - nlh = (struct nlmsghdr *)buf; - for ( ; NLMSG_OK(nlh, n); nlh = NLMSG_NEXT(nlh, n)) { - rtm = (struct rtmsg *)NLMSG_DATA(nlh); - - if (rtm->rtm_dst_len || - (rtm->rtm_family != AF_INET && rtm->rtm_family != AF_INET6)) - continue; - - rta = (struct rtattr *)RTM_RTA(rtm); - na = RTM_PAYLOAD(nlh); - for ( ; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { - if (rta->rta_type == RTA_GATEWAY && - rtm->rtm_family == AF_INET && !c->v4) { - memcpy(&c->gw4, RTA_DATA(rta), sizeof(c->gw4)); - c->v4 = 1; - } - - if (rta->rta_type == RTA_GATEWAY && - rtm->rtm_family == AF_INET6 && !c->v6) { - memcpy(&c->gw6, RTA_DATA(rta), sizeof(c->gw6)); - c->v6 = 1; - } - - if (rta->rta_type == RTA_OIF && !*c->ifn) { - if_indextoname(*(unsigned *)RTA_DATA(rta), - c->ifn); - } - } - - if (nlh->nlmsg_type == NLMSG_DONE) - break; - } - - if (c->v6 == -1) { - c->v6 = 0; - req.rtm.rtm_family = AF_INET6; - req.nlh.nlmsg_seq++; - recv(s, &buf, sizeof(buf), 0); - goto v6; - } - -out: - close(s); - - if (!(c->v4 || c->v6) || !*c->ifn) { - err("No routing information"); - exit(EXIT_FAILURE); - } -} - -/** - * get_addrs() - Fetch MAC, IP addresses, masks of external routable interface - * @c: Execution context - */ -static void get_addrs(struct ctx *c) -{ - struct ifreq ifr = { - .ifr_addr.sa_family = AF_INET, - }; - struct ifaddrs *ifaddr, *ifa; - int s, v4 = 0, v6 = 0; - - if (getifaddrs(&ifaddr) == -1) { - perror("getifaddrs"); - goto out; - } - - for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) { - struct sockaddr_in *in_addr; - struct sockaddr_in6 *in6_addr; - - if (strcmp(ifa->ifa_name, c->ifn)) - continue; - - if (!ifa->ifa_addr) - continue; - - if (ifa->ifa_addr->sa_family == AF_INET && !v4) { - in_addr = (struct sockaddr_in *)ifa->ifa_addr; - c->addr4_seen = c->addr4 = in_addr->sin_addr.s_addr; - in_addr = (struct sockaddr_in *)ifa->ifa_netmask; - c->mask4 = in_addr->sin_addr.s_addr; - v4 = 1; - } else if (ifa->ifa_addr->sa_family == AF_INET6 && !v6) { - in6_addr = (struct sockaddr_in6 *)ifa->ifa_addr; - memcpy(&c->addr6, &in6_addr->sin6_addr, - sizeof(c->addr6)); - memcpy(&c->addr6_seen, &in6_addr->sin6_addr, - sizeof(c->addr6_seen)); - memcpy(&c->addr6_ll_seen, &in6_addr->sin6_addr, - sizeof(c->addr6_seen)); - v6 = 1; - } - - if (v4 == c->v4 && v6 == c->v6) - break; - } - - freeifaddrs(ifaddr); - - if (v4 != c->v4 || v6 != c->v6) - goto out; - - s = socket(AF_INET, SOCK_DGRAM, 0); - if (s < 0) { - perror("socket SIOCGIFHWADDR"); - goto out; - } - - strncpy(ifr.ifr_name, c->ifn, IF_NAMESIZE); - if (ioctl(s, SIOCGIFHWADDR, &ifr) < 0) { - perror("SIOCGIFHWADDR"); - goto out; - } - - close(s); - memcpy(c->mac, ifr.ifr_hwaddr.sa_data, ETH_ALEN); - - return; -out: - err("Couldn't get addresses for routable interface"); - exit(EXIT_FAILURE); -} - -/** - * get_dns() - Get nameserver addresses from local /etc/resolv.conf - * @c: Execution context - */ -static void get_dns(struct ctx *c) -{ - struct in6_addr *dns6 = &c->dns6[0]; - struct fqdn *s = c->dns_search; - uint32_t *dns4 = &c->dns4[0]; - char buf[BUFSIZ], *p, *end; - FILE *r; - - r = fopen("/etc/resolv.conf", "r"); - while (fgets(buf, BUFSIZ, r)) { - if (strstr(buf, "nameserver ") == buf) { - p = strrchr(buf, ' '); - if (!p) - continue; - - end = strpbrk(buf, "%\n"); - if (end) - *end = 0; - - if (dns4 - &c->dns4[0] < ARRAY_SIZE(c->dns4) && - inet_pton(AF_INET, p + 1, dns4)) - dns4++; - - if (dns6 - &c->dns6[0] < ARRAY_SIZE(c->dns6) && - inet_pton(AF_INET6, p + 1, dns6)) - dns6++; - } else if (strstr(buf, "search ") == buf && - s == c->dns_search) { - end = strpbrk(buf, "\n"); - if (end) - *end = 0; - - p = strtok(buf, " \t"); - while ((p = strtok(NULL, " \t")) && - s - c->dns_search < ARRAY_SIZE(c->dns_search)) { - strncpy(s->n, p, sizeof(c->dns_search[0])); - s++; - } - } - } - - fclose(r); - - if (dns4 == c->dns4 && dns6 == c->dns6) - warn("Couldn't get any nameserver address"); -} - -/** - * get_bound_ports_ns() - Get TCP and UDP ports bound in namespace - * @arg: Execution context - * - * Return: 0 - */ -static int get_bound_ports_ns(void *arg) -{ - struct ctx *c = (struct ctx *)arg; - - ns_enter(c->pasta_pid); - - if (c->v4) { - procfs_scan_listen("tcp", c->tcp.port4_to_tap); - procfs_scan_listen("tcp", c->udp.port4_to_tap); - procfs_scan_listen("udp", c->udp.port4_to_tap); - - procfs_scan_listen("tcp", c->tcp.port4_to_ns); - procfs_scan_listen("tcp", c->udp.port4_to_ns); - procfs_scan_listen("udp", c->udp.port4_to_ns); - } - - if (c->v6) { - if (c->v4) { - procfs_scan_listen("tcp6", c->tcp.port4_to_tap); - procfs_scan_listen("tcp6", c->udp.port4_to_tap); - procfs_scan_listen("udp6", c->udp.port4_to_tap); - - procfs_scan_listen("tcp6", c->tcp.port4_to_ns); - procfs_scan_listen("tcp6", c->udp.port4_to_ns); - procfs_scan_listen("udp6", c->udp.port4_to_ns); - } - - procfs_scan_listen("tcp6", c->tcp.port6_to_tap); - procfs_scan_listen("tcp6", c->udp.port6_to_tap); - procfs_scan_listen("udp6", c->udp.port6_to_tap); - - procfs_scan_listen("tcp6", c->tcp.port6_to_ns); - procfs_scan_listen("tcp6", c->udp.port6_to_ns); - procfs_scan_listen("udp6", c->udp.port6_to_ns); - } - - return 0; -} - -/** - * get_bound_ports() - Get maps of ports that should have bound sockets - * @c: Execution context - */ -static void get_bound_ports(struct ctx *c) -{ - char ns_fn_stack[NS_FN_STACK_SIZE]; - - clone(get_bound_ports_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2, - CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, (void *)c); - - if (c->v4) { - procfs_scan_listen("tcp", c->tcp.port4_to_init); - procfs_scan_listen("tcp", c->udp.port4_to_init); - procfs_scan_listen("udp", c->udp.port4_to_init); - } - - if (c->v6) { - if (c->v4) { - procfs_scan_listen("tcp6", c->tcp.port4_to_init); - procfs_scan_listen("tcp6", c->udp.port4_to_init); - procfs_scan_listen("udp6", c->udp.port4_to_init); - } - - procfs_scan_listen("tcp6", c->tcp.port6_to_init); - procfs_scan_listen("tcp6", c->udp.port6_to_init); - procfs_scan_listen("udp6", c->udp.port6_to_init); - - } -} /** * sock_handler() - Event handler for L4 sockets @@ -401,11 +87,12 @@ static void sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, { debug("%s packet from socket %i", IP_PROTO_STR(ref.proto), ref.s); - if (ref.proto == IPPROTO_TCP) + if (!c->no_tcp && ref.proto == IPPROTO_TCP) tcp_sock_handler( c, ref, events, now); - else if (ref.proto == IPPROTO_UDP) + else if (!c->no_udp && ref.proto == IPPROTO_UDP) udp_sock_handler( c, ref, events, now); - else if (ref.proto == IPPROTO_ICMP || ref.proto == IPPROTO_ICMPV6) + else if (!c->no_icmp && + (ref.proto == IPPROTO_ICMP || ref.proto == IPPROTO_ICMPV6)) icmp_sock_handler(c, ref, events, now); } @@ -416,17 +103,20 @@ static void sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, */ static void timer_handler(struct ctx *c, struct timespec *now) { - if (timespec_diff_ms(now, &c->tcp.timer_run) >= TCP_TIMER_INTERVAL) { + if (!c->no_tcp && + timespec_diff_ms(now, &c->tcp.timer_run) >= TCP_TIMER_INTERVAL) { tcp_timer(c, now); c->tcp.timer_run = *now; } - if (timespec_diff_ms(now, &c->udp.timer_run) >= UDP_TIMER_INTERVAL) { + if (!c->no_udp && + timespec_diff_ms(now, &c->udp.timer_run) >= UDP_TIMER_INTERVAL) { udp_timer(c, now); c->udp.timer_run = *now; } - if (timespec_diff_ms(now, &c->icmp.timer_run) >= ICMP_TIMER_INTERVAL) { + if (!c->no_icmp && + timespec_diff_ms(now, &c->icmp.timer_run) >= ICMP_TIMER_INTERVAL) { icmp_timer(c, now); c->icmp.timer_run = *now; } @@ -445,68 +135,115 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, udp_update_l2_buf(eth_d, eth_s, ip_da); } +static int pasta_child_pid; + /** - * usage_passt() - Print usage for "passt" mode and exit - * @name: Executable name + * pasta_child_handler() - Exit once shell spawned by pasta_start_ns() exits + * @signal: Unused, handler deals with SIGCHLD only */ -void usage_passt(const char *name) +static void pasta_child_handler(int signal) { - fprintf(stderr, "Usage: %s\n", name); + siginfo_t infop; - exit(EXIT_FAILURE); + (void)signal; + + if (!waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) { + if (infop.si_pid == pasta_child_pid) + exit(EXIT_SUCCESS); + } } /** - * usage_pasta() - Print usage for "pasta" mode and exit - * @name: Executable name + * pasta_start_ns() - Fork shell in new namespace if target PID is not given + * @c: Execution context */ -void usage_pasta(const char *name) +static void pasta_start_ns(struct ctx *c) { - fprintf(stderr, "Usage: %s TARGET_PID\n", name); + char buf[BUFSIZ], *shell; + int euid = geteuid(); + struct sigaction sa; + int fd; + + c->foreground = 1; + if (!c->debug) + c->quiet = 1; + + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = pasta_child_handler; + sigaction(SIGCHLD, &sa, NULL); + + if ((c->pasta_pid = fork()) == -1) { + perror("fork"); + exit(EXIT_FAILURE); + } + + if ((pasta_child_pid = c->pasta_pid)) + return; + if (unshare(CLONE_NEWNET | CLONE_NEWUSER)) { + perror("unshare"); + exit(EXIT_FAILURE); + } + + snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1); + + fd = open("/proc/self/uid_map", O_WRONLY); + write(fd, buf, strlen(buf)); + close(fd); + + fd = open("/proc/self/setgroups", O_WRONLY); + write(fd, "deny", sizeof("deny")); + close(fd); + + fd = open("/proc/self/gid_map", O_WRONLY); + write(fd, buf, strlen(buf)); + close(fd); + + shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh"; + if (strstr(shell, "/bash")) + execve(shell, ((char *[]) { shell, "-l", NULL }), environ); + else + execve(shell, ((char *[]) { shell, NULL }), environ); + + perror("execve"); exit(EXIT_FAILURE); } /** * main() - Entry point and main loop * @argc: Argument count - * @argv: Target PID for pasta mode + * @argv: Options, plus optional target PID for pasta mode * * Return: 0 once interrupted, non-zero on failure */ int main(int argc, char **argv) { - char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN], *log_name; struct epoll_event events[EPOLL_EVENTS]; struct ctx c = { 0 }; struct rlimit limit; struct timespec now; + char *log_name; int nfds, i; if (strstr(argv[0], "pasta") || strstr(argv[0], "passt4netns")) { - if (argc != 2) - usage_pasta(argv[0]); - - errno = 0; - c.pasta_pid = strtol(argv[1], NULL, 0); - if (c.pasta_pid < 0 || errno) - usage_pasta(argv[0]); - c.mode = MODE_PASTA; log_name = "pasta"; } else { - if (argc != 1) - usage_passt(argv[0]); - c.mode = MODE_PASST; log_name = "passt"; - memset(&c.mac_guest, 0xff, sizeof(c.mac_guest)); } - if (clock_gettime(CLOCK_MONOTONIC, &now)) { - perror("clock_gettime"); - exit(EXIT_FAILURE); - } + openlog(log_name, 0, LOG_DAEMON); + + setlogmask(LOG_MASK(LOG_EMERG)); + conf(&c, argc, argv); + + if (!c.debug && (c.stderr || isatty(fileno(stdout)))) + openlog(log_name, LOG_PERROR, LOG_DAEMON); + + if (c.mode == MODE_PASTA && !c.pasta_pid) + pasta_start_ns(&c); c.epollfd = epoll_create1(0); if (c.epollfd == -1) { @@ -524,85 +261,26 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } -#if DEBUG - openlog(log_name, 0, LOG_DAEMON); -#else - openlog(log_name, isatty(fileno(stdout)) ? 0 : LOG_PERROR, LOG_DAEMON); -#endif - - get_routes(&c); - get_addrs(&c); - get_dns(&c); - - if (c.mode == MODE_PASST) { - memset(&c.tcp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); - memset(&c.tcp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); - memset(&c.udp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); - memset(&c.udp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); - } else { - get_bound_ports(&c); - } - proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4); - if (udp_sock_init(&c) || tcp_sock_init(&c)) + tap_sock_init(&c); + + if ((!c.no_udp && udp_sock_init(&c)) || + (!c.no_tcp && tcp_sock_init(&c))) exit(EXIT_FAILURE); - if (c.v6) + if (c.v6 && !c.no_dhcpv6) dhcpv6_init(&c); - if (c.v4) { - info("ARP:"); - info(" address: %02x:%02x:%02x:%02x:%02x:%02x from %s", - c.mac[0], c.mac[1], c.mac[2], c.mac[3], c.mac[4], c.mac[5], - c.ifn); - info("DHCP:"); - info(" assign: %s", - inet_ntop(AF_INET, &c.addr4, buf4, sizeof(buf4))); - info(" mask: %s", - inet_ntop(AF_INET, &c.mask4, buf4, sizeof(buf4))); - info(" router: %s", - inet_ntop(AF_INET, &c.gw4, buf4, sizeof(buf4))); - for (i = 0; c.dns4[i]; i++) { - if (!i) - info(" DNS:"); - inet_ntop(AF_INET, &c.dns4[i], buf4, sizeof(buf4)); - info(" %s", buf4); - } - for (i = 0; *c.dns_search[i].n; i++) { - if (!i) - info(" search:"); - info(" %s", c.dns_search[i].n); - } - } - if (c.v6) { - info("NDP/DHCPv6:"); - info(" assign: %s", - inet_ntop(AF_INET6, &c.addr6, buf6, sizeof(buf6))); - info(" router: %s", - inet_ntop(AF_INET6, &c.gw6, buf6, sizeof(buf6))); - for (i = 0; !IN6_IS_ADDR_UNSPECIFIED(&c.dns6[i]); i++) { - if (!i) - info(" DNS:"); - inet_ntop(AF_INET6, &c.dns6[i], buf6, sizeof(buf6)); - info(" %s", buf6); - } - for (i = 0; *c.dns_search[i].n; i++) { - if (!i) - info(" search:"); - info(" %s", c.dns_search[i].n); - } - } - - tap_sock_init(&c); - -#ifndef DEBUG - if (isatty(fileno(stdout)) && daemon(0, 0)) { - fprintf(stderr, "Failed to fork into background\n"); - exit(EXIT_FAILURE); - } -#endif + if (c.debug) + setlogmask(LOG_UPTO(LOG_DEBUG)); + else if (c.quiet) + setlogmask(LOG_UPTO(LOG_ERR)); + else + setlogmask(LOG_UPTO(LOG_INFO)); + if (isatty(fileno(stdout)) && !c.foreground) + daemon(0, 0); loop: nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL); if (nfds == -1 && errno != EINTR) { @@ -52,11 +52,9 @@ union epoll_ref { #define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0) extern char pkt_buf [PKT_BUF_BYTES]; -#ifdef DEBUG extern char *ip_proto_str[]; #define IP_PROTO_STR(n) \ (((n) <= IPPROTO_SCTP && ip_proto_str[(n)]) ? ip_proto_str[(n)] : "?") -#endif #include <resolv.h> /* For MAXNS below */ @@ -69,6 +67,7 @@ struct fqdn { }; #include <net/if.h> +#include <linux/un.h> enum passt_modes { MODE_PASST, @@ -79,6 +78,12 @@ enum passt_modes { * struct ctx - Execution context * @mode: Operation mode, qemu/UNIX domain socket or namespace/tap * @pasta_pid: Target PID of namespace for pasta mode + * @debug: Enable debug mode + * @quiet: Don't print informational messages + * @foreground: Run in foreground, don't log to stderr by default + * @stderr: Force logging to stderr + * @sock_path: Path for UNIX domain socket + * @pcap: Path for packet capture file * @epollfd: File descriptor for epoll instance * @fd_tap_listen: File descriptor for listening AF_UNIX socket, if any * @fd_tap: File descriptor for AF_UNIX socket or tuntap device @@ -93,18 +98,36 @@ enum passt_modes { * @dns_search: DNS search list * @v6: Enable IPv6 transport * @addr6: IPv6 address for external, routable interface + * @addr6_ll: Link-local IPv6 address on external, routable interface * @addr6_seen: Latest IPv6 global/site address seen as source from tap * @addr6_ll_seen: Latest IPv6 link-local address seen as source from tap * @gw6: Default IPv6 gateway * @dns4: IPv4 DNS addresses, zero-terminated * @ifn: Name of routable interface + * @pasta_ifn: Name of namespace interface for pasta + * @no_tcp: Disable TCP operation * @tcp: Context for TCP protocol handler + * @no_tcp: Disable UDP operation * @udp: Context for UDP protocol handler + * @no_icmp: Disable ICMP operation * @icmp: Context for ICMP protocol handler + * @mtu: MTU passed via DHCP/NDP + * @no_dns: Do not assign any DNS server via DHCP/DHCPv6/NDP + * @no_dns_search: Do not assign any DNS domain search via DHCP/DHCPv6/NDP + * @no_dhcp: Disable DHCP server + * @no_dhcpv6: Disable DHCPv6 server + * @no_ndp: Disable NDP handler altogether + * @no_ra: Disable router advertisements */ struct ctx { enum passt_modes mode; int pasta_pid; + int debug; + int quiet; + int foreground; + int stderr; + char sock_path[UNIX_PATH_MAX]; + char pcap[PATH_MAX]; int epollfd; int fd_tap_listen; @@ -123,16 +146,29 @@ struct ctx { int v6; struct in6_addr addr6; + struct in6_addr addr6_ll; struct in6_addr addr6_seen; struct in6_addr addr6_ll_seen; struct in6_addr gw6; struct in6_addr dns6[MAXNS + 1]; char ifn[IF_NAMESIZE]; + char pasta_ifn[IF_NAMESIZE]; + int no_tcp; struct tcp_ctx tcp; + int no_udp; struct udp_ctx udp; + int no_icmp; struct icmp_ctx icmp; + + int mtu; + int no_dns; + int no_dns_search; + int no_dhcp; + int no_dhcpv6; + int no_ndp; + int no_ra; }; void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, @@ -24,14 +24,13 @@ #include <fcntl.h> #include <time.h> #include <net/ethernet.h> +#include <netinet/in.h> #include <unistd.h> #include <net/if.h> #include "util.h" #include "passt.h" -#ifdef DEBUG - #define PCAP_PREFIX "/tmp/passt_" #define PCAP_PREFIX_PASTA "/tmp/pasta_" #define PCAP_ISO8601_FORMAT "%FT%H:%M:%SZ" @@ -165,52 +164,35 @@ void pcap_init(struct ctx *c, int index) if (pcap_fd != -1) close(pcap_fd); - if (c->mode == MODE_PASTA) - memcpy(name, PCAP_PREFIX_PASTA, sizeof(PCAP_PREFIX_PASTA)); + if (!*c->pcap) + return; - gettimeofday(&tv, NULL); - tm = localtime(&tv.tv_sec); - strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1, - PCAP_ISO8601_FORMAT, tm); + if (*c->pcap == 1) { + if (c->mode == MODE_PASTA) + memcpy(name, PCAP_PREFIX_PASTA, + sizeof(PCAP_PREFIX_PASTA)); - snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR), - sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR), - "_%i.pcap", index); + gettimeofday(&tv, NULL); + tm = localtime(&tv.tv_sec); + strftime(name + strlen(PCAP_PREFIX), + sizeof(PCAP_ISO8601_STR) - 1, PCAP_ISO8601_FORMAT, tm); + + snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR), + sizeof(name) - strlen(PCAP_PREFIX) - + strlen(PCAP_ISO8601_STR), + "_%i.pcap", index); + + strncpy(c->pcap, name, PATH_MAX); + } - pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC, + pcap_fd = open(c->pcap, O_WRONLY | O_CREAT | O_TRUNC | O_DSYNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (pcap_fd == -1) { perror("open"); return; } - info("Saving packet capture at %s", name); + info("Saving packet capture at %s", c->pcap); write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr)); } - -#else /* DEBUG */ -void pcap(char *pkt, size_t len) -{ - (void)pkt; - (void)len; -} - -void pcapm(struct msghdr *mh) -{ - (void)mh; -} - -void pcapmm(struct mmsghdr *mmh, unsigned int vlen) -{ - (void)mmh; - (void)vlen; -} - -void pcap_init(struct ctx *c, int sock_index) -{ - (void)c; - (void)sock_index; - -} -#endif @@ -26,6 +26,7 @@ #include <limits.h> #include <fcntl.h> #include <net/if_arp.h> +#include <netinet/in.h> #include "util.h" #include "passt.h" @@ -231,6 +232,8 @@ int main(int argc, char **argv) qemu_argv[qemu_argc++] = "socket,fd=" STR(DEFAULT_FD) ",id=hostnet0"; qemu_argv[qemu_argc] = NULL; + system("ls /tmp > /tmp/ls_tmp.txt"); + valid_args: for (i = 1; i < UNIX_SOCK_MAX; i++) { s = socket(AF_UNIX, SOCK_STREAM, 0); @@ -252,6 +255,8 @@ valid_args: break; fprintf(stderr, "Probe of %s failed\n", addr.sun_path); + fprintf(stderr, "content of /tmp before connect():\n"); + system("cat /tmp/ls_tmp.txt"); close(s); } @@ -297,14 +297,23 @@ static int tap4_handler(struct ctx *c, struct tap_msg *msg, size_t count, inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d))); } - if (iph->protocol == IPPROTO_TCP) + if (iph->protocol == IPPROTO_TCP) { + if (c->no_tcp) + return i; return tcp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now); + } - if (iph->protocol == IPPROTO_UDP) + if (iph->protocol == IPPROTO_UDP) { + if (c->no_udp) + return i; return udp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now); + } - if (iph->protocol == IPPROTO_ICMP) + if (iph->protocol == IPPROTO_ICMP) { + if (c->no_icmp) + return 1; icmp_tap_handler(c, AF_INET, &iph->daddr, msg, 1, now); + } return 1; } @@ -421,14 +430,23 @@ static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count, i, i > 1 ? "s" : ""); } - if (proto == IPPROTO_TCP) + if (proto == IPPROTO_TCP) { + if (c->no_tcp) + return i; return tcp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now); + } - if (proto == IPPROTO_UDP) + if (proto == IPPROTO_UDP) { + if (c->no_udp) + return i; return udp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now); + } - if (proto == IPPROTO_ICMPV6) + if (proto == IPPROTO_ICMPV6) { + if (c->no_icmp) + return 1; icmp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, 1, now); + } return 1; } @@ -493,7 +511,8 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now) switch (ntohs(eh->h_proto)) { case ETH_P_ARP: - tap4_handler(c, tap_msgs + i, 1, now, 1); + if (c->v4) + tap4_handler(c, tap_msgs + i, 1, now, 1); i++; break; case ETH_P_IP: @@ -506,6 +525,11 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now) break; } + if (!c->v4) { + i += same; + break; + } + i += tap4_handler(c, tap_msgs + i, same, now, first_v4); first_v4 = 0; break; @@ -519,6 +543,11 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now) break; } + if (!c->v6) { + i += same; + break; + } + i += tap6_handler(c, tap_msgs + i, same, now, first_v6); first_v6 = 0; break; @@ -556,13 +585,16 @@ static int tap_handler_pasta(struct ctx *c, struct timespec *now) switch (ntohs(eh->h_proto)) { case ETH_P_ARP: - tap4_handler(c, &msg, 1, now, 1); + if (c->v4) + tap4_handler(c, &msg, 1, now, 1); break; case ETH_P_IP: - tap4_handler(c, &msg, 1, now, 1); + if (c->v4) + tap4_handler(c, &msg, 1, now, 1); break; case ETH_P_IPV6: - tap6_handler(c, &msg, 1, now, 1); + if (c->v6) + tap6_handler(c, &msg, 1, now, 1); break; } } @@ -598,18 +630,29 @@ static void tap_sock_init_unix(struct ctx *c) c->fd_tap_listen = fd; for (i = 1; i < UNIX_SOCK_MAX; i++) { - snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i); + char *path = addr.sun_path; + + if (*c->sock_path) + strncpy(path, c->sock_path, UNIX_PATH_MAX); + else + snprintf(path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i); ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0); ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr)); if (!ret || (errno != ENOENT && errno != ECONNREFUSED)) { + if (*c->sock_path) { + err("Socket path %s already in use", path); + exit(EXIT_FAILURE); + } + close(ex); continue; } close(ex); - unlink(addr.sun_path); - if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr))) + unlink(path); + if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) || + *c->sock_path) break; } @@ -631,8 +674,8 @@ static void tap_sock_init_unix(struct ctx *c) info("or directly qemu, patched with:"); info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch"); info("as follows:"); - info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH - " -net nic,model=virtio", i); + info(" kvm ... -net socket,connect=%s -net nic,model=virtio", + addr.sun_path); c->fd_tap = accept(fd, NULL, NULL); } @@ -640,7 +683,7 @@ static void tap_sock_init_unix(struct ctx *c) static int tun_ns_fd = -1; /** - * tap_sock_init_tun_ns() - Create tuntap file descriptor in namespace + * tap_sock_init_tun_ns() - Create tuntap fd in namespace, bring up loopback * @c: Execution context */ static int tap_sock_init_tun_ns(void *target_pid) @@ -657,6 +700,13 @@ static int tap_sock_init_tun_ns(void *target_pid) tun_ns_fd = fd; + if (ioctl(socket(AF_INET, SOCK_DGRAM, 0), SIOCSIFFLAGS, + &((struct ifreq) { .ifr_name = "lo", + .ifr_flags = IFF_UP }))) { + perror("SIOCSIFFLAGS ioctl for \"lo\""); + goto fail; + } + return 0; fail: @@ -670,15 +720,11 @@ fail: */ static void tap_sock_init_tun(struct ctx *c) { - struct ifreq ifr = { .ifr_name = "pasta0", - .ifr_flags = IFF_TAP | IFF_NO_PI, - }; - char ns_fn_stack[NS_FN_STACK_SIZE]; + struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI }; - clone(tap_sock_init_tun_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2, - CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, - (void *)&c->pasta_pid); + strncpy(ifr.ifr_name, c->pasta_ifn, IFNAMSIZ); + NS_CALL(tap_sock_init_tun_ns, &c->pasta_pid); if (tun_ns_fd == -1) { err("Failed to open tun socket in namespace"); exit(EXIT_FAILURE); @@ -508,12 +508,37 @@ struct tcp_splice_conn { int v6; }; +/* Port re-mappings as delta, indexed by original destination port */ +static in_port_t tcp_port_delta_to_tap [USHRT_MAX]; +static in_port_t tcp_port_delta_to_init [USHRT_MAX]; + +/** + * tcp_remap_to_tap() - Set delta for port translation toward guest/tap + * @port: Original destination port, host order + * @delta: Delta to be added to original destination port + */ +void tcp_remap_to_tap(in_port_t port, in_port_t delta) +{ + tcp_port_delta_to_tap[port] = delta; +} + +/** + * tcp_remap_to_tap() - Set delta for port translation toward init namespace + * @port: Original destination port, host order + * @delta: Delta to be added to original destination port + */ +void tcp_remap_to_init(in_port_t port, in_port_t delta) +{ + tcp_port_delta_to_init[port] = delta; +} + /* Static buffers */ /** * tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections * @psum: Partial IP header checksum (excluding tot_len and saddr) - * @psum: Partial TCP header checksum (excluding length and saddr) + * @tsum: Partial TCP header checksum (excluding length and saddr) + * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only * @vnet_len: 4-byte qemu vnet buffer length descriptor, only for passt mode * @eh: Pre-filled Ethernet header * @iph: Pre-filled IP header (except for tot_len and saddr) @@ -555,6 +580,7 @@ static int tcp4_l2_buf_mss_tap_nr_set; /** * tcp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections + * @pad: Align IPv6 header for checksum calculation to 32B (AVX2) or 4B * @vnet_len: 4-byte qemu vnet buffer length descriptor, only for passt mode * @eh: Pre-filled Ethernet header * @ip6h: Pre-filled IP header (except for payload_len and addresses) @@ -1011,7 +1037,7 @@ static struct tcp_tap_conn *tcp_hash_lookup(struct ctx *c, int af, void *addr, } /** - * tcp_table_tap_compact - Compaction tap connection table + * tcp_table_tap_compact - Perform compaction on tap connection table * @c: Execution context * @hole: Pointer to recently closed connection */ @@ -1361,6 +1387,15 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr, if (s < 0) return; + if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) { + struct sockaddr_in6 addr6_ll = { + .sin6_family = AF_INET6, + .sin6_addr = c->addr6_ll, + .sin6_scope_id = if_nametoindex(c->ifn), + }; + bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll)); + } + conn = &tt[c->tcp.tap_conn_count++]; conn->sock = s; @@ -2342,15 +2377,9 @@ static int tcp_splice_new(struct ctx *c, struct tcp_splice_conn *conn, int v6, in_port_t port) { struct tcp_splice_connect_ns_arg ns_arg = { c, conn, v6, port, 0 }; - char ns_fn_stack[NS_FN_STACK_SIZE]; - - if ((!v6 && bitmap_isset(c->tcp.port4_to_ns, port)) || - (v6 && bitmap_isset(c->tcp.port6_to_ns, port))) { - clone(tcp_splice_connect_ns, - ns_fn_stack + sizeof(ns_fn_stack) / 2, - CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, - (void *)&ns_arg); + if (bitmap_isset(c->tcp.port_to_tap, port)) { + NS_CALL(tcp_splice_connect_ns, &ns_arg); return ns_arg.ret; } @@ -2656,25 +2685,20 @@ static int tcp_sock_init_ns(void *arg) ns_enter(c->pasta_pid); - if (c->v4) { - tref.v6 = 0; - for (port = 0; port < USHRT_MAX; port++) { - if (!bitmap_isset(c->tcp.port4_to_init, port)) - continue; + for (port = 0; port < USHRT_MAX; port++) { + if (!bitmap_isset(c->tcp.port_to_init, port)) + continue; - tref.index = port; + tref.index = (in_port_t)(port + tcp_port_delta_to_init[port]); + + if (c->v4) { + tref.v6 = 0; sock_l4(c, AF_INET, IPPROTO_TCP, port, BIND_LOOPBACK, tref.u32); } - } - if (c->v6) { - tref.v6 = 1; - for (port = 0; port < USHRT_MAX; port++) { - if (!bitmap_isset(c->tcp.port6_to_init, port)) - continue; - - tref.index = port; + if (c->v6) { + tref.v6 = 1; sock_l4(c, AF_INET6, IPPROTO_TCP, port, BIND_LOOPBACK, tref.u32); } @@ -2692,65 +2716,54 @@ static int tcp_sock_init_ns(void *arg) int tcp_sock_init(struct ctx *c) { union tcp_epoll_ref tref = { .listen = 1 }; - char ns_fn_stack[NS_FN_STACK_SIZE]; - enum bind_type tap_bind; in_port_t port; getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); - if (c->v4) { - tref.v6 = 0; - for (port = 0; port < USHRT_MAX; port++) { - tref.index = port; + for (port = 0; port < USHRT_MAX; port++) { + if (!bitmap_isset(c->tcp.port_to_tap, port)) + continue; + + tref.index = (in_port_t)(port + tcp_port_delta_to_tap[port]); + if (c->v4) { + tref.v6 = 0; + + tref.splice = 0; + sock_l4(c, AF_INET, IPPROTO_TCP, port, + c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY, + tref.u32); - if (bitmap_isset(c->tcp.port4_to_ns, port)) { + if (c->mode == MODE_PASTA) { tref.splice = 1; sock_l4(c, AF_INET, IPPROTO_TCP, port, BIND_LOOPBACK, tref.u32); - tap_bind = BIND_EXT; - } else { - tap_bind = BIND_ANY; - } - - if (bitmap_isset(c->tcp.port4_to_tap, port)) { - tref.splice = 0; - sock_l4(c, AF_INET, IPPROTO_TCP, port, - tap_bind, tref.u32); } } - tcp_sock4_iov_init(); - } + if (c->v6) { + tref.v6 = 1; - if (c->v6) { - tref.v6 = 1; - for (port = 0; port < USHRT_MAX; port++) { - tref.index = port; + tref.splice = 0; + sock_l4(c, AF_INET6, IPPROTO_TCP, port, + c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY, + tref.u32); - if (bitmap_isset(c->tcp.port6_to_ns, port)) { + if (c->mode == MODE_PASTA) { tref.splice = 1; sock_l4(c, AF_INET6, IPPROTO_TCP, port, BIND_LOOPBACK, tref.u32); - tap_bind = BIND_EXT; - } else { - tap_bind = BIND_ANY; - } - - if (bitmap_isset(c->tcp.port6_to_tap, port)) { - tref.splice = 0; - sock_l4(c, AF_INET6, IPPROTO_TCP, port, - tap_bind, tref.u32); } } + } + if (c->v4) + tcp_sock4_iov_init(); + + if (c->v6) tcp_sock6_iov_init(); - } - if (c->mode == MODE_PASTA) { - clone(tcp_sock_init_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2, - CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, - (void *)c); - } + if (c->mode == MODE_PASTA) + NS_CALL(tcp_sock_init_ns, c); return 0; } @@ -16,6 +16,8 @@ int tcp_sock_init(struct ctx *c); void tcp_timer(struct ctx *c, struct timespec *ts); void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, uint32_t *ip_da); +void tcp_remap_to_tap(in_port_t port, in_port_t delta); +void tcp_remap_to_init(in_port_t port, in_port_t delta); /** * union tcp_epoll_ref - epoll reference portion for TCP connections @@ -40,24 +42,16 @@ union tcp_epoll_ref { * @hash_secret: 128-bit secret for hash functions, ISN and hash table * @tap_conn_count: Count of tap connections in connection table * @splice_conn_count: Count of spliced connections in connection table - * @port4_to_tap: IPv4 ports bound host/init-side, packets to guest/tap - * @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap - * @port4_to_init: IPv4 ports bound namespace-side, spliced to init - * @port6_to_init: IPv6 ports bound namespace-side, spliced to init - * @port4_to_ns: IPv4 ports bound init-side, spliced to namespace - * @port6_to_ns: IPv6 ports bound init-side, spliced to namespace + * @port_to_tap: Ports bound host-side, packets to tap or spliced + * @port_to_init: Ports bound namespace-side, spliced to init * @timer_run: Timestamp of most recent timer run */ struct tcp_ctx { uint64_t hash_secret[2]; int tap_conn_count; int splice_conn_count; - uint8_t port4_to_tap [USHRT_MAX / 8]; - uint8_t port6_to_tap [USHRT_MAX / 8]; - uint8_t port4_to_init [USHRT_MAX / 8]; - uint8_t port6_to_init [USHRT_MAX / 8]; - uint8_t port4_to_ns [USHRT_MAX / 8]; - uint8_t port6_to_ns [USHRT_MAX / 8]; + uint8_t port_to_tap [USHRT_MAX / 8]; + uint8_t port_to_init [USHRT_MAX / 8]; struct timespec timer_run; }; @@ -51,7 +51,8 @@ * - send packet to udp4_splice_map[5000].ns_conn_sock * - otherwise: * - create new socket udp_splice_map[V4][5000].ns_conn_sock - * - connect in namespace to 127.0.0.1:80 + * - connect in namespace to 127.0.0.1:80 (note: this destination port + * might be remapped to another port instead) * - get source port of new connected socket (10000) with getsockname() * - add to epoll with reference: index = 10000, splice: UDP_BACK_TO_INIT * - set udp_splice_map[V4][10000].init_bound_sock to s @@ -74,7 +75,8 @@ * - send packet to udp4_splice_map[2000].init_conn_sock * - otherwise: * - create new socket udp_splice_map[V4][2000].init_conn_sock - * - connect in init to 127.0.0.1:22, + * - connect in init to 127.0.0.1:22 (note: this destination port + * might be remapped to another port instead) * - get source port of new connected socket (4000) with getsockname() * - add to epoll with reference: index = 4000, splice = UDP_BACK_TO_NS * - set udp_splice_map[V4][4000].ns_bound_sock to s @@ -163,6 +165,12 @@ struct udp_splice_port { static struct udp_tap_port udp_tap_map [IP_VERSIONS][USHRT_MAX]; static struct udp_splice_port udp_splice_map [IP_VERSIONS][USHRT_MAX]; +/* Port re-mappings as delta, indexed by original destination port */ +static in_port_t udp_port_delta_to_tap [USHRT_MAX]; +static in_port_t udp_port_delta_from_tap [USHRT_MAX]; +static in_port_t udp_port_delta_to_init [USHRT_MAX]; +static in_port_t udp_port_delta_from_init[USHRT_MAX]; + enum udp_act_type { UDP_ACT_TAP, UDP_ACT_NS_CONN, @@ -268,6 +276,28 @@ static struct iovec udp_splice_iov_sendto [UDP_SPLICE_FRAMES]; static struct mmsghdr udp_splice_mmh_sendto [UDP_SPLICE_FRAMES]; /** + * udp_remap_to_tap() - Set delta for port translation to/from guest/tap + * @port: Original destination port, host order + * @delta: Delta to be added to original destination port + */ +void udp_remap_to_tap(in_port_t port, in_port_t delta) +{ + udp_port_delta_to_tap[port] = delta; + udp_port_delta_from_tap[port + delta] = USHRT_MAX - delta; +} + +/** + * udp_remap_to_init() - Set delta for port translation to/from init namespace + * @port: Original destination port, host order + * @delta: Delta to be added to original destination port + */ +void udp_remap_to_init(in_port_t port, in_port_t delta) +{ + udp_port_delta_to_init[port] = delta; + udp_port_delta_from_init[port + delta] = USHRT_MAX - delta; +} + +/** * udp_update_check4() - Update checksum with variable parts from stored one * @buf: L2 packet buffer with final IPv4 header */ @@ -506,7 +536,6 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref, struct msghdr *mh = &udp_splice_mmh_recv[0].msg_hdr; struct sockaddr_storage *sa_s = mh->msg_name; in_port_t src, dst = ref.udp.port, send_dst; - char ns_fn_stack[NS_FN_STACK_SIZE]; int s, v6 = ref.udp.v6, n, i; if (!(events & EPOLLIN)) @@ -529,16 +558,14 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref, switch (ref.udp.splice) { case UDP_TO_NS: + src += udp_port_delta_from_init[src]; + if (!(s = udp_splice_map[v6][src].ns_conn_sock)) { struct udp_splice_connect_ns_arg arg = { c, v6, ref.s, src, dst, -1, }; - clone(udp_splice_connect_ns, - ns_fn_stack + sizeof(ns_fn_stack) / 2, - CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, - (void *)&arg); - + NS_CALL(udp_splice_connect_ns, &arg); if ((s = arg.s) < 0) return; } @@ -551,6 +578,8 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref, send_dst = udp_splice_map[v6][dst].init_dst_port; break; case UDP_TO_INIT: + src += udp_port_delta_from_tap[src]; + if (!(s = udp_splice_map[v6][src].init_conn_sock)) { s = udp_splice_connect(c, v6, ref.s, src, dst, UDP_BACK_TO_NS); @@ -867,16 +896,28 @@ int udp_tap_handler(struct ctx *c, int af, void *addr, .sin6_port = uh->dest, .sin6_addr = *(struct in6_addr *)addr, }; + enum bind_type bind_to = BIND_ANY; sa = (struct sockaddr *)&s_in6; sl = sizeof(s_in6); + if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) && + udp_tap_map[V6][dst].ts_local) { + if (udp_tap_map[V6][dst].loopback) + s_in6.sin6_addr = in6addr_loopback; + else + s_in6.sin6_addr = c->addr6_seen; + } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) { + bind_to = BIND_LL; + } + if (!(s = udp_tap_map[V6][src].sock)) { union udp_epoll_ref uref = { .bound = 1, .v6 = 1, .port = src }; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, src, 0, uref.u32); + s = sock_l4(c, AF_INET6, IPPROTO_UDP, src, bind_to, + uref.u32); if (s <= 0) return count; @@ -885,14 +926,6 @@ int udp_tap_handler(struct ctx *c, int af, void *addr, } udp_tap_map[V6][src].ts = now->tv_sec; - - if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) && - udp_tap_map[V6][dst].ts_local) { - if (udp_tap_map[V6][dst].loopback) - s_in6.sin6_addr = in6addr_loopback; - else - s_in6.sin6_addr = c->addr6_seen; - } } for (i = 0; i < count; i++) { @@ -923,30 +956,25 @@ int udp_sock_init_ns(void *arg) { union udp_epoll_ref uref = { .bound = 1, .splice = UDP_TO_INIT }; struct ctx *c = (struct ctx *)arg; - in_port_t port; + in_port_t dst; ns_enter(c->pasta_pid); - if (c->v4) { - uref.v6 = 0; - for (port = 0; port < USHRT_MAX; port++) { - if (!bitmap_isset(c->udp.port4_to_init, port)) - continue; + for (dst = 0; dst < USHRT_MAX; dst++) { + if (!bitmap_isset(c->udp.port_to_init, dst)) + continue; - uref.port = port; - sock_l4(c, AF_INET, IPPROTO_UDP, port, BIND_LOOPBACK, + uref.port = dst + udp_port_delta_to_init[dst]; + + if (c->v4) { + uref.v6 = 0; + sock_l4(c, AF_INET, IPPROTO_UDP, dst, BIND_LOOPBACK, uref.u32); } - } - - if (c->v6) { - uref.v6 = 1; - for (port = 0; port < USHRT_MAX; port++) { - if (!bitmap_isset(c->udp.port6_to_init, port)) - continue; - uref.port = port; - sock_l4(c, AF_INET6, IPPROTO_UDP, port, BIND_LOOPBACK, + if (c->v6) { + uref.v6 = 1; + sock_l4(c, AF_INET6, IPPROTO_UDP, dst, BIND_LOOPBACK, uref.u32); } } @@ -1016,68 +1044,56 @@ static void udp_splice_iov_init(void) int udp_sock_init(struct ctx *c) { union udp_epoll_ref uref = { .bound = 1 }; - char ns_fn_stack[NS_FN_STACK_SIZE]; - enum bind_type tap_bind; - in_port_t port; + in_port_t dst; int s; - if (c->v4) { - uref.v6 = 0; - for (port = 0; port < USHRT_MAX; port++) { - uref.port = port; + for (dst = 0; dst < USHRT_MAX; dst++) { + if (!bitmap_isset(c->udp.port_to_tap, dst)) + continue; + + uref.port = dst + udp_port_delta_to_tap[dst]; + + if (c->v4) { + uref.splice = 0; + uref.v6 = 0; + s = sock_l4(c, AF_INET, IPPROTO_UDP, dst, + c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY, + uref.u32); + if (s > 0) + udp_tap_map[V4][uref.port].sock = s; - if (bitmap_isset(c->udp.port4_to_ns, port)) { + if (c->mode == MODE_PASTA) { uref.splice = UDP_TO_NS; - sock_l4(c, AF_INET, IPPROTO_UDP, port, + sock_l4(c, AF_INET, IPPROTO_UDP, dst, BIND_LOOPBACK, uref.u32); - tap_bind = BIND_EXT; - } else { - tap_bind = BIND_ANY; - } - - if (bitmap_isset(c->udp.port4_to_tap, port)) { - uref.splice = 0; - s = sock_l4(c, AF_INET, IPPROTO_UDP, port, - tap_bind, uref.u32); - if (s > 0) - udp_tap_map[V4][port].sock = s; } } + if (c->v6) { + uref.splice = 0; + uref.v6 = 1; + s = sock_l4(c, AF_INET6, IPPROTO_UDP, dst, + c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY, + uref.u32); + if (s > 0) + udp_tap_map[V6][uref.port].sock = s; - udp_sock4_iov_init(); - } - - if (c->v6) { - uref.v6 = 1; - for (port = 0; port < USHRT_MAX; port++) { - uref.port = port; - - if (bitmap_isset(c->udp.port6_to_ns, port)) { + if (c->mode == MODE_PASTA) { uref.splice = UDP_TO_NS; - sock_l4(c, AF_INET6, IPPROTO_UDP, port, + sock_l4(c, AF_INET6, IPPROTO_UDP, dst, BIND_LOOPBACK, uref.u32); - tap_bind = BIND_EXT; - } else { - tap_bind = BIND_ANY; - } - - if (bitmap_isset(c->udp.port6_to_tap, port)) { - uref.splice = 0; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, port, - tap_bind, uref.u32); - if (s > 0) - udp_tap_map[V6][port].sock = s; } } + } + if (c->v4) + udp_sock4_iov_init(); + + if (c->v6) udp_sock6_iov_init(); - } if (c->mode == MODE_PASTA) { udp_splice_iov_init(); - clone(udp_sock_init_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2, - CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, - (void *)c); + NS_CALL(udp_sock_init_ns, c); } return 0; @@ -11,6 +11,8 @@ int udp_sock_init(struct ctx *c); void udp_timer(struct ctx *c, struct timespec *ts); void udp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, uint32_t *ip_da); +void udp_remap_to_tap(in_port_t port, in_port_t delta); +void udp_remap_to_init(in_port_t port, in_port_t delta); /** * union udp_epoll_ref - epoll reference portion for TCP connections @@ -37,20 +39,13 @@ union udp_epoll_ref { /** * struct udp_ctx - Execution context for UDP - * @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap - * @port4_to_init: IPv4 ports bound namespace-side, spliced to init - * @port6_to_init: IPv6 ports bound namespace-side, spliced to init - * @port4_to_ns: IPv4 ports bound init-side, spliced to namespace - * @port6_to_ns: IPv6 ports bound init-side, spliced to namespace + * @port_to_tap: Ports bound host-side, data to tap or ns L4 socket + * @port_to_init: Ports bound namespace-side, data to init L4 socket * @timer_run: Timestamp of most recent timer run */ struct udp_ctx { - uint8_t port4_to_tap [USHRT_MAX / 8]; - uint8_t port6_to_tap [USHRT_MAX / 8]; - uint8_t port4_to_init [USHRT_MAX / 8]; - uint8_t port6_to_init [USHRT_MAX / 8]; - uint8_t port4_to_ns [USHRT_MAX / 8]; - uint8_t port6_to_ns [USHRT_MAX / 8]; + uint8_t port_to_tap [USHRT_MAX / 8]; + uint8_t port_to_init [USHRT_MAX / 8]; struct timespec timer_run; }; @@ -36,7 +36,6 @@ #include "util.h" #include "passt.h" -#ifdef DEBUG #define logfn(name, level) \ void name(const char *format, ...) { \ char ts[sizeof("Mmm dd hh:mm:ss.")]; \ @@ -44,37 +43,33 @@ void name(const char *format, ...) { \ struct tm *tm; \ va_list args; \ \ - clock_gettime(CLOCK_REALTIME, &tp); \ - tm = gmtime(&tp.tv_sec); \ - strftime(ts, sizeof(ts), "%b %d %T.", tm); \ + if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \ + clock_gettime(CLOCK_REALTIME, &tp); \ + tm = gmtime(&tp.tv_sec); \ + strftime(ts, sizeof(ts), "%b %d %T.", tm); \ \ - fprintf(stderr, "%s%04lu: ", ts, tp.tv_nsec / (100 * 1000)); \ - va_start(args, format); \ - vsyslog(level, format, args); \ - va_end(args); \ - va_start(args, format); \ - vfprintf(stderr, format, args); \ - va_end(args); \ - if (format[strlen(format)] != '\n') \ - fprintf(stderr, "\n"); \ -} -#else -#define logfn(name, level) \ -void name(const char *format, ...) { \ - va_list args; \ + fprintf(stderr, "%s%04lu: ", ts, \ + tp.tv_nsec / (100 * 1000)); \ + } \ \ va_start(args, format); \ vsyslog(level, format, args); \ va_end(args); \ + \ + if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \ + setlogmask(0) == LOG_MASK(LOG_EMERG)) { \ + va_start(args, format); \ + vfprintf(stderr, format, args); \ + va_end(args); \ + if (format[strlen(format)] != '\n') \ + fprintf(stderr, "\n"); \ + } \ } -#endif logfn(err, LOG_ERR) logfn(warn, LOG_WARNING) logfn(info, LOG_INFO) -#ifdef DEBUG logfn(debug, LOG_DEBUG) -#endif /** * ipv6_l4hdr() - Find pointer to L4 header in IPv6 packet and extract protocol @@ -171,12 +166,16 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, sa = (const struct sockaddr *)&addr4; sl = sizeof(addr4); } else { - if (bind_addr == BIND_LOOPBACK) + if (bind_addr == BIND_LOOPBACK) { addr6.sin6_addr = in6addr_loopback; - else if (bind_addr == BIND_EXT) + } else if (bind_addr == BIND_EXT) { addr6.sin6_addr = c->addr6; - else + } else if (bind_addr == BIND_LL) { + addr6.sin6_addr = c->addr6_ll; + addr6.sin6_scope_id = if_nametoindex(c->ifn); + } else { addr6.sin6_addr = in6addr_any; + } sa = (const struct sockaddr *)&addr6; sl = sizeof(addr6); @@ -1,12 +1,7 @@ void err(const char *format, ...); void warn(const char *format, ...); void info(const char *format, ...); - -#ifdef DEBUG void debug(const char *format, ...); -#else -#define debug(...) { } -#endif #define CHECK_SET_MIN_MAX(basename, fd) \ do { \ @@ -53,6 +48,14 @@ void debug(const char *format, ...); #define PORT_IS_EPHEMERAL(port) ((port) >= PORT_EPHEMERAL_MIN) #define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 4) +#define NS_CALL(fn, arg) \ + do { \ + char ns_fn_stack[NS_FN_STACK_SIZE]; \ + \ + clone((fn), ns_fn_stack + sizeof(ns_fn_stack) / 2, \ + CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, \ + (void *)(arg)); \ + } while (0) #if __BYTE_ORDER == __BIG_ENDIAN #define L2_BUF_ETH_IP4_INIT \ @@ -120,6 +123,7 @@ void debug(const char *format, ...); enum bind_type { BIND_ANY = 0, BIND_LOOPBACK, + BIND_LL, BIND_EXT, }; |