// SPDX-License-Identifier: AGPL-3.0-or-later /* PASST - Plug A Simple Socket Transport * for qemu/UNIX domain socket mode * * PASTA - Pack A Subtle Tap Abstraction * for network namespace/tap device mode * * passt.c - Daemon implementation * * Copyright (c) 2020-2021 Red Hat GmbH * Author: Stefano Brivio * * Grab Ethernet frames from AF_UNIX socket (in "passt" mode) or tap device (in * "pasta" mode), build SOCK_DGRAM/SOCK_STREAM sockets for each 5-tuple from * TCP, UDP packets, perform connection tracking and forward them. Forward * packets received on sockets back to the UNIX domain socket (typically, a * socket virtio_net file descriptor from qemu) or to the tap device (typically, * created in a separate network namespace). */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "passt.h" #include "dhcpv6.h" #include "icmp.h" #include "tcp.h" #include "udp.h" #include "pcap.h" #include "tap.h" #include "conf.h" #define EPOLL_EVENTS 8 #define __TIMER_INTERVAL MIN(TCP_TIMER_INTERVAL, UDP_TIMER_INTERVAL) #define TIMER_INTERVAL MIN(__TIMER_INTERVAL, ICMP_TIMER_INTERVAL) char pkt_buf[PKT_BUF_BYTES] __attribute__ ((aligned(PAGE_SIZE))); char *ip_proto_str[IPPROTO_SCTP + 1] = { [IPPROTO_ICMP] = "ICMP", [IPPROTO_TCP] = "TCP", [IPPROTO_UDP] = "UDP", [IPPROTO_ICMPV6] = "ICMPV6", [IPPROTO_SCTP] = "SCTP", }; /** * sock_handler() - Event handler for L4 sockets * @c: Execution context * @ref: epoll reference * @events: epoll events * @now: Current timestamp */ static void sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, struct timespec *now) { debug("%s: %s packet from socket %i (events: 0x%08x)", c->mode == MODE_PASST ? "passt" : "pasta", IP_PROTO_STR(ref.proto), ref.s, events); if (!c->no_tcp && ref.proto == IPPROTO_TCP) tcp_sock_handler( c, ref, events, now); else if (!c->no_udp && ref.proto == IPPROTO_UDP) udp_sock_handler( c, ref, events, now); else if (!c->no_icmp && (ref.proto == IPPROTO_ICMP || ref.proto == IPPROTO_ICMPV6)) icmp_sock_handler(c, ref, events, now); } /** * timer_handler() - Run periodic tasks for L4 protocol handlers * @c: Execution context * @now: Current timestamp */ static void timer_handler(struct ctx *c, struct timespec *now) { if (!c->no_tcp && timespec_diff_ms(now, &c->tcp.timer_run) >= TCP_TIMER_INTERVAL) { tcp_timer(c, now); c->tcp.timer_run = *now; } if (!c->no_udp && timespec_diff_ms(now, &c->udp.timer_run) >= UDP_TIMER_INTERVAL) { udp_timer(c, now); c->udp.timer_run = *now; } if (!c->no_icmp && timespec_diff_ms(now, &c->icmp.timer_run) >= ICMP_TIMER_INTERVAL) { icmp_timer(c, now); c->icmp.timer_run = *now; } } /** * timer_init() - Set initial timestamp for timer runs to current time * @c: Execution context * @now: Current timestamp */ static void timer_init(struct ctx *c, struct timespec *now) { c->tcp.timer_run = c->udp.timer_run = c->icmp.timer_run = *now; } /** * proto_update_l2_buf() - Update scatter-gather L2 buffers in protocol handlers * @eth_d: Ethernet destination address, NULL if unchanged * @eth_s: Ethernet source address, NULL if unchanged * @ip_da: Pointer to IPv4 destination address, NULL if unchanged */ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, uint32_t *ip_da) { tcp_update_l2_buf(eth_d, eth_s, ip_da); udp_update_l2_buf(eth_d, eth_s, ip_da); } static int pasta_child_pid; static char pasta_child_ns[PATH_MAX]; /** * pasta_ns_cleanup() - Look for processes in namespace, terminate them */ static void pasta_ns_cleanup(void) { char proc_path[PATH_MAX], ns_link[PATH_MAX]; int recheck = 0, found = 0, waited = 0; struct dirent *dp; DIR *dirp; if (!*pasta_child_ns) return; loop: if (!(dirp = opendir("/proc"))) return; while ((dp = readdir(dirp))) { pid_t pid; errno = 0; pid = strtol(dp->d_name, NULL, 0); if (!pid || errno) continue; snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", pid); if (readlink(proc_path, ns_link, PATH_MAX) < 0) continue; if (!strncmp(ns_link, pasta_child_ns, PATH_MAX)) { found = 1; if (waited) kill(pid, SIGKILL); else kill(pid, SIGQUIT); } } closedir(dirp); if (!found) return; if (waited) { if (recheck) { info("Some processes in namespace didn't quit"); } else { found = 0; recheck = 1; goto loop; } return; } info("Waiting for all processes in namespace to terminate"); sleep(1); waited = 1; goto loop; } /** * pasta_child_handler() - Exit once shell spawned by pasta_start_ns() exits * @signal: Unused, handler deals with SIGCHLD only */ static void pasta_child_handler(int signal) { siginfo_t infop; (void)signal; if (pasta_child_pid && !waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) { if (infop.si_pid == pasta_child_pid) { pasta_ns_cleanup(); exit(EXIT_SUCCESS); } } waitid(P_ALL, 0, NULL, WEXITED | WNOHANG); } /** * pasta_wait_for_ns() - Busy loop until we can enter the target namespace * @target_pid: PID of process associated with target namespace * * Return: 0 */ static int pasta_wait_for_ns(void *target_pid) { while (ns_enter(*(int *)target_pid)); return 0; } /** * pasta_start_ns() - Fork shell in new namespace if target PID is not given * @c: Execution context */ static void pasta_start_ns(struct ctx *c) { char buf[BUFSIZ], *shell; int euid = geteuid(); int fd; c->foreground = 1; if (!c->debug) c->quiet = 1; if ((c->pasta_pid = fork()) == -1) { perror("fork"); exit(EXIT_FAILURE); } if ((pasta_child_pid = c->pasta_pid)) { NS_CALL(pasta_wait_for_ns, &pasta_child_pid); return; } if (unshare(CLONE_NEWNET | CLONE_NEWUSER)) { perror("unshare"); exit(EXIT_FAILURE); } snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1); fd = open("/proc/self/uid_map", O_WRONLY); write(fd, buf, strlen(buf)); close(fd); fd = open("/proc/self/setgroups", O_WRONLY); write(fd, "deny", sizeof("deny")); close(fd); fd = open("/proc/self/gid_map", O_WRONLY); write(fd, buf, strlen(buf)); close(fd); fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY); write(fd, "0 0", strlen("0 0")); close(fd); shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh"; if (strstr(shell, "/bash")) execve(shell, ((char *[]) { shell, "-l", NULL }), environ); else execve(shell, ((char *[]) { shell, NULL }), environ); perror("execve"); exit(EXIT_FAILURE); } /** * main() - Entry point and main loop * @argc: Argument count * @argv: Options, plus optional target PID for pasta mode * * Return: 0 once interrupted, non-zero on failure */ int main(int argc, char **argv) { struct epoll_event events[EPOLL_EVENTS]; struct ctx c = { 0 }; struct rlimit limit; struct timespec now; char *log_name; int nfds, i; if (strstr(argv[0], "pasta") || strstr(argv[0], "passt4netns")) { struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sa.sa_handler = pasta_child_handler; sigaction(SIGCHLD, &sa, NULL); signal(SIGPIPE, SIG_IGN); c.mode = MODE_PASTA; log_name = "pasta"; } else { c.mode = MODE_PASST; log_name = "passt"; } if (madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE)) perror("madvise"); openlog(log_name, 0, LOG_DAEMON); setlogmask(LOG_MASK(LOG_EMERG)); conf(&c, argc, argv); if (!c.debug && (c.stderr || isatty(fileno(stdout)))) openlog(log_name, LOG_PERROR, LOG_DAEMON); if (c.mode == MODE_PASTA && !c.pasta_pid) { char proc_path[PATH_MAX]; pasta_start_ns(&c); snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", c.pasta_pid); readlink(proc_path, pasta_child_ns, PATH_MAX); } c.epollfd = epoll_create1(0); if (c.epollfd == -1) { perror("epoll_create1"); exit(EXIT_FAILURE); } if (getrlimit(RLIMIT_NOFILE, &limit)) { perror("getrlimit"); exit(EXIT_FAILURE); } limit.rlim_cur = limit.rlim_max; if (setrlimit(RLIMIT_NOFILE, &limit)) { perror("setrlimit"); exit(EXIT_FAILURE); } proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4); tap_sock_init(&c); clock_gettime(CLOCK_MONOTONIC, &now); if ((!c.no_udp && udp_sock_init(&c, &now)) || (!c.no_tcp && tcp_sock_init(&c, &now))) exit(EXIT_FAILURE); if (c.v6 && !c.no_dhcpv6) dhcpv6_init(&c); if (c.debug) setlogmask(LOG_UPTO(LOG_DEBUG)); else if (c.quiet) setlogmask(LOG_UPTO(LOG_ERR)); else setlogmask(LOG_UPTO(LOG_INFO)); if (isatty(fileno(stdout)) && !c.foreground) daemon(0, 0); timer_init(&c, &now); loop: nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL); if (nfds == -1 && errno != EINTR) { perror("epoll_wait"); exit(EXIT_FAILURE); } clock_gettime(CLOCK_MONOTONIC, &now); for (i = 0; i < nfds; i++) { union epoll_ref ref = *((union epoll_ref *)&events[i].data.u64); if (events[i].data.fd == c.fd_tap) tap_handler(&c, events[i].events, &now); else sock_handler(&c, ref, events[i].events, &now); } timer_handler(&c, &now); goto loop; return 0; }