// SPDX-License-Identifier: AGPL-3.0-or-later /* PASST - Plug A Simple Socket Transport * for qemu/UNIX domain socket mode * * PASTA - Pack A Subtle Tap Abstraction * for network namespace/tap device mode * * pasta.c - pasta (namespace) specific implementations * * Copyright (c) 2020-2021 Red Hat GmbH * Author: Stefano Brivio * * #syscalls:pasta clone waitid exit exit_group rt_sigprocmask * #syscalls:pasta rt_sigreturn|sigreturn armv6l:sigreturn armv7l:sigreturn * #syscalls:pasta ppc64:sigreturn s390x:sigreturn */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "passt.h" #include "isolation.h" #include "netlink.h" /* PID of child, in case we created a namespace */ static int pasta_child_pid; /** * pasta_child_handler() - Exit once shell exits (if we started it), reap clones * @signal: Unused, handler deals with SIGCHLD only */ void pasta_child_handler(int signal) { siginfo_t infop; (void)signal; if (signal != SIGCHLD) return; if (pasta_child_pid && !waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) { if (infop.si_pid == pasta_child_pid) exit(EXIT_SUCCESS); /* Nothing to do, detached PID namespace going away */ } waitid(P_ALL, 0, NULL, WEXITED | WNOHANG); waitid(P_ALL, 0, NULL, WEXITED | WNOHANG); } /** * pasta_wait_for_ns() - Busy loop until we can enter the target namespace * @arg: Execution context * * Return: 0 */ static int pasta_wait_for_ns(void *arg) { struct ctx *c = (struct ctx *)arg; int flags = O_RDONLY | O_CLOEXEC; char ns[PATH_MAX]; snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid); do while ((c->pasta_netns_fd = open(ns, flags)) < 0); while (setns(c->pasta_netns_fd, CLONE_NEWNET) && !close(c->pasta_netns_fd)); return 0; } /** * ns_check() - Check if we can enter configured namespaces * @arg: Execution context * * Return: 0 */ static int ns_check(void *arg) { struct ctx *c = (struct ctx *)arg; if (setns(c->pasta_netns_fd, CLONE_NEWNET)) c->pasta_netns_fd = -1; return 0; } /** * pasta_open_ns() - Open network namespace descriptors * @c: Execution context * @netns: network namespace path * * Return: 0 on success, negative error code otherwise */ void pasta_open_ns(struct ctx *c, const char *netns) { int nfd = -1; nfd = open(netns, O_RDONLY | O_CLOEXEC); if (nfd < 0) { err("Couldn't open network namespace %s", netns); exit(EXIT_FAILURE); } c->pasta_netns_fd = nfd; NS_CALL(ns_check, c); if (c->pasta_netns_fd < 0) { err("Couldn't switch to pasta namespaces"); exit(EXIT_FAILURE); } if (!c->no_netns_quit) { char buf[PATH_MAX] = { 0 }; strncpy(buf, netns, PATH_MAX - 1); strncpy(c->netns_base, basename(buf), PATH_MAX - 1); strncpy(buf, netns, PATH_MAX - 1); strncpy(c->netns_dir, dirname(buf), PATH_MAX - 1); } } /** * struct pasta_setup_ns_arg - Argument for pasta_setup_ns() * @argv: Command and arguments to run */ struct pasta_setup_ns_arg { char **argv; }; /** * pasta_setup_ns() - Map credentials, enable access to ping sockets, run shell * @arg: See @pasta_setup_ns_arg * * Return: this function never returns */ static int pasta_setup_ns(void *arg) { struct pasta_setup_ns_arg *a = (struct pasta_setup_ns_arg *)arg; FWRITE("/proc/sys/net/ipv4/ping_group_range", "0 0", "Cannot set ping_group_range, ICMP requests might fail"); execvp(a->argv[0], a->argv); perror("execvp"); exit(EXIT_FAILURE); } /** * pasta_start_ns() - Fork command in new namespace if target ns is not given * @c: Execution context * @argc: Number of arguments for spawned command * @argv: Command to spawn and arguments */ void pasta_start_ns(struct ctx *c, int argc, char *argv[]) { struct pasta_setup_ns_arg arg = { .argv = argv, }; char *shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh"; char *sh_argv[] = { shell, NULL }; char *bash_argv[] = { shell, "-l", NULL }; char ns_fn_stack[NS_FN_STACK_SIZE]; c->foreground = 1; if (!c->debug) c->quiet = 1; if (argc == 0) { if (strstr(shell, "/bash")) { arg.argv = bash_argv; } else { arg.argv = sh_argv; } } pasta_child_pid = clone(pasta_setup_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2, CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWUTS, (void *)&arg); if (pasta_child_pid == -1) { perror("clone"); exit(EXIT_FAILURE); } drop_caps(); if (pasta_child_pid) { NS_CALL(pasta_wait_for_ns, c); return; } } /** * pasta_ns_conf() - Set up loopback and tap interfaces in namespace as needed * @c: Execution context */ void pasta_ns_conf(struct ctx *c) { nl_link(1, 1 /* lo */, MAC_ZERO, 1, 0); if (c->pasta_conf_ns) { int prefix_len; nl_link(1, c->pasta_ifi, c->mac_guest, 1, c->mtu); if (c->ifi4) { prefix_len = __builtin_popcount(c->ip4.mask); nl_addr(1, c->pasta_ifi, AF_INET, &c->ip4.addr, &prefix_len, NULL); nl_route(1, c->pasta_ifi, AF_INET, &c->ip4.gw); } if (c->ifi6) { prefix_len = 64; nl_addr(1, c->pasta_ifi, AF_INET6, &c->ip6.addr, &prefix_len, NULL); nl_route(1, c->pasta_ifi, AF_INET6, &c->ip6.gw); } } else { nl_link(1, c->pasta_ifi, c->mac_guest, 0, 0); } proto_update_l2_buf(c->mac_guest, NULL, NULL); } /** * pasta_netns_quit_init() - Watch network namespace to quit once it's gone * @c: Execution context * * Return: inotify file descriptor, -1 on failure or if not needed/applicable */ int pasta_netns_quit_init(struct ctx *c) { int flags = O_NONBLOCK | O_CLOEXEC; struct epoll_event ev = { .events = EPOLLIN }; int inotify_fd; if (c->mode != MODE_PASTA || c->no_netns_quit || !*c->netns_base) return -1; if ((inotify_fd = inotify_init1(flags)) < 0) { perror("inotify_init(): won't quit once netns is gone"); return -1; } if (inotify_add_watch(inotify_fd, c->netns_dir, IN_DELETE) < 0) { perror("inotify_add_watch(): won't quit once netns is gone"); return -1; } ev.data.fd = inotify_fd; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, inotify_fd, &ev); return inotify_fd; } /** * pasta_netns_quit_handler() - Handle ns directory events, exit if ns is gone * @c: Execution context * @inotify_fd: inotify file descriptor with watch on namespace directory */ void pasta_netns_quit_handler(struct ctx *c, int inotify_fd) { char buf[sizeof(struct inotify_event) + NAME_MAX + 1]; struct inotify_event *in_ev = (struct inotify_event *)buf; if (read(inotify_fd, buf, sizeof(buf)) < (ssize_t)sizeof(*in_ev)) return; if (strncmp(in_ev->name, c->netns_base, sizeof(c->netns_base))) return; info("Namespace %s is gone, exiting", c->netns_base); exit(EXIT_SUCCESS); }