aboutgitcodebugslistschat
path: root/isolation.c
diff options
context:
space:
mode:
Diffstat (limited to 'isolation.c')
-rw-r--r--isolation.c167
1 files changed, 167 insertions, 0 deletions
diff --git a/isolation.c b/isolation.c
new file mode 100644
index 0000000..41ca888
--- /dev/null
+++ b/isolation.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ * for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ * for network namespace/tap device mode
+ *
+ * isolation.c - Self isolation helpers
+ *
+ * Copyright Red Hat
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <pwd.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <linux/audit.h>
+#include <linux/capability.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+
+#include "util.h"
+#include "seccomp.h"
+#include "passt.h"
+#include "isolation.h"
+
+/**
+ * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE
+ */
+void drop_caps(void)
+{
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (i == CAP_NET_BIND_SERVICE)
+ continue;
+
+ prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
+ }
+}
+
+/**
+ * drop_root() - Switch to given UID and GID
+ * @uid: User ID to switch to
+ * @gid: Group ID to switch to
+ */
+void drop_root(uid_t uid, gid_t gid)
+{
+ if (setgroups(0, NULL)) {
+ /* If we don't start with CAP_SETGID, this will EPERM */
+ if (errno != EPERM) {
+ err("Can't drop supplementary groups: %s",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (!setgid(gid) && !setuid(uid))
+ return;
+
+ err("Can't change user/group, exiting");
+ exit(EXIT_FAILURE);
+}
+
+/**
+ * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
+ *
+ * Return: negative error code on failure, zero on success
+ */
+int sandbox(struct ctx *c)
+{
+ int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS;
+
+ if (!c->netns_only) {
+ if (c->pasta_userns_fd == -1)
+ flags |= CLONE_NEWUSER;
+ else
+ setns(c->pasta_userns_fd, CLONE_NEWUSER);
+ }
+
+ c->pasta_userns_fd = -1;
+
+ /* If we run in foreground, we have no chance to actually move to a new
+ * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody
+ * ever gets around seccomp profiles -- there's no harm in passing it.
+ */
+ if (!c->foreground || c->mode == MODE_PASST)
+ flags |= CLONE_NEWPID;
+
+ if (unshare(flags)) {
+ perror("unshare");
+ return -errno;
+ }
+
+ if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) {
+ perror("mount /");
+ return -errno;
+ }
+
+ if (mount("", TMPDIR, "tmpfs",
+ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY,
+ "nr_inodes=2,nr_blocks=0")) {
+ perror("mount tmpfs");
+ return -errno;
+ }
+
+ if (chdir(TMPDIR)) {
+ perror("chdir");
+ return -errno;
+ }
+
+ if (syscall(SYS_pivot_root, ".", ".")) {
+ perror("pivot_root");
+ return -errno;
+ }
+
+ if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) {
+ perror("umount2");
+ return -errno;
+ }
+
+ drop_caps(); /* Relative to the new user namespace this time. */
+
+ return 0;
+}
+
+/**
+ * seccomp() - Set up seccomp filters depending on mode, won't return on failure
+ * @c: Execution context
+ */
+void seccomp(const struct ctx *c)
+{
+ struct sock_fprog prog;
+
+ if (c->mode == MODE_PASST) {
+ prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
+ prog.filter = filter_passt;
+ } else {
+ prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
+ prog.filter = filter_pasta;
+ }
+
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+ prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+ perror("prctl");
+ exit(EXIT_FAILURE);
+ }
+}