diff options
Diffstat (limited to 'pasta.c')
-rw-r--r-- | pasta.c | 253 |
1 files changed, 253 insertions, 0 deletions
@@ -0,0 +1,253 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +/* PASST - Plug A Simple Socket Transport + * for qemu/UNIX domain socket mode + * + * PASTA - Pack A Subtle Tap Abstraction + * for network namespace/tap device mode + * + * pasta.c - pasta (namespace) specific implementations + * + * Copyright (c) 2020-2021 Red Hat GmbH + * Author: Stefano Brivio <sbrivio@redhat.com> + * + * #syscalls:pasta clone unshare waitid kill execve exit_group rt_sigprocmask + * #syscalls:pasta geteuid getdents64 readlink setsid + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <syslog.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/wait.h> +#include <signal.h> +#include <dirent.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <net/ethernet.h> +#include <sys/syscall.h> + +#include "util.h" +#include "passt.h" +#include "netlink.h" + +/* PID of child, in case we created a namespace, and its procfs link */ +static int pasta_child_pid; +static char pasta_child_ns[PATH_MAX]; + +/** + * pasta_ns_cleanup() - Look for processes in namespace, terminate them + */ +static void pasta_ns_cleanup(void) +{ + char proc_path[PATH_MAX], ns_link[PATH_MAX], buf[BUFSIZ]; + int recheck = 0, found = 0, waited = 0; + int dir_fd, n; + + if (!*pasta_child_ns) + return; + +loop: + if ((dir_fd = open("/proc", O_RDONLY | O_DIRECTORY)) < 0) + return; + + while ((n = syscall(SYS_getdents64, dir_fd, buf, BUFSIZ)) > 0) { + struct dirent *dp = (struct dirent *)buf; + int pos = 0; + + while (pos < n) { + pid_t pid; + + errno = 0; + pid = strtol(dp->d_name, NULL, 0); + if (!pid || errno) + goto next; + + snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", pid); + if (readlink(proc_path, ns_link, PATH_MAX) < 0) + goto next; + + if (!strncmp(ns_link, pasta_child_ns, PATH_MAX)) { + found = 1; + if (waited) + kill(pid, SIGKILL); + else + kill(pid, SIGQUIT); + } +next: + dp = (struct dirent *)(buf + (pos += dp->d_reclen)); + } + } + + close(dir_fd); + + if (!found) + return; + + if (waited) { + if (recheck) { + info("Some processes in namespace didn't quit"); + } else { + found = 0; + recheck = 1; + goto loop; + } + return; + } + + info("Waiting for all processes in namespace to terminate"); + sleep(1); + waited = 1; + goto loop; +} + +/** + * pasta_child_handler() - Exit once shell exits (if we started it), reap clones + * @signal: Unused, handler deals with SIGCHLD only + */ +void pasta_child_handler(int signal) +{ + siginfo_t infop; + + (void)signal; + + if (pasta_child_pid && + !waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) { + if (infop.si_pid == pasta_child_pid) { + pasta_ns_cleanup(); + exit(EXIT_SUCCESS); + } + } + + waitid(P_ALL, 0, NULL, WEXITED | WNOHANG); + waitid(P_ALL, 0, NULL, WEXITED | WNOHANG); +} + +/** + * pasta_wait_for_ns() - Busy loop until we can enter the target namespace + * @arg: Execution context + * + * Return: 0 + */ +static int pasta_wait_for_ns(void *arg) +{ + struct ctx *c = (struct ctx *)arg; + char ns[PATH_MAX]; + + if (c->netns_only) + goto netns; + + snprintf(ns, PATH_MAX, "/proc/%i/ns/user", pasta_child_pid); + do + while ((c->pasta_userns_fd = open(ns, O_RDONLY)) < 0); + while (setns(c->pasta_userns_fd, 0) && !close(c->pasta_userns_fd)); + +netns: + snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid); + do + while ((c->pasta_netns_fd = open(ns, O_RDONLY)) < 0); + while (setns(c->pasta_netns_fd, 0) && !close(c->pasta_netns_fd)); + + return 0; +} + +/** + * pasta_start_ns() - Fork shell in new namespace if target ns is not given + * @c: Execution context + */ +void pasta_start_ns(struct ctx *c) +{ + char buf[BUFSIZ], *shell, proc_path[PATH_MAX]; + int euid = geteuid(); + int fd; + + c->foreground = 1; + if (!c->debug) + c->quiet = 1; + + if ((pasta_child_pid = fork()) == -1) { + perror("fork"); + exit(EXIT_FAILURE); + } + + if (pasta_child_pid) { + NS_CALL(pasta_wait_for_ns, c); + + snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", + pasta_child_pid); + readlink(proc_path, pasta_child_ns, PATH_MAX); + + return; + } + + if (unshare(CLONE_NEWNET | (c->netns_only ? 0 : CLONE_NEWUSER))) { + perror("unshare"); + exit(EXIT_FAILURE); + } + + if (!c->netns_only) { + snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1); + + fd = open("/proc/self/uid_map", O_WRONLY); + write(fd, buf, strlen(buf)); + close(fd); + + fd = open("/proc/self/setgroups", O_WRONLY); + write(fd, "deny", sizeof("deny")); + close(fd); + + fd = open("/proc/self/gid_map", O_WRONLY); + write(fd, buf, strlen(buf)); + close(fd); + } + + fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY); + write(fd, "0 0", strlen("0 0")); + close(fd); + + shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh"; + if (strstr(shell, "/bash")) + execve(shell, ((char *[]) { shell, "-l", NULL }), environ); + else + execve(shell, ((char *[]) { shell, NULL }), environ); + + perror("execve"); + exit(EXIT_FAILURE); +} + +/** + * pasta_ns_conf() - Set up loopback and tap interfaces in namespace as needed + * @c: Execution context + */ +void pasta_ns_conf(struct ctx *c) +{ + nl_link(1, 1 /* lo */, MAC_ZERO, 1); + + if (c->pasta_conf_ns) { + nl_link(1, c->pasta_ifi, c->mac_guest, 1); + + if (c->v4) { + nl_addr(1, c->pasta_ifi, AF_INET, &c->addr4, + __builtin_popcount(c->mask4), NULL); + nl_route(1, c->pasta_ifi, AF_INET, &c->gw4); + } + + if (c->v6) { + nl_addr(1, c->pasta_ifi, AF_INET6, &c->addr6, 64, NULL); + nl_route(1, c->pasta_ifi, AF_INET6, &c->gw6); + } + } else { + nl_link(1, c->pasta_ifi, c->mac_guest, 0); + } + + proto_update_l2_buf(c->mac_guest, NULL, NULL); +} |