aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2022-10-14 15:25:35 +1100
committerStefano Brivio <sbrivio@redhat.com>2022-10-15 02:10:36 +0200
commitfb449b16bd8ccf8b751dfa5aee54cb7de0525706 (patch)
treed980c9a46b9cf94ee0320e8d6aa0c2b3ec378c23
parentc22ebccba8a09065e59d080ac7a2969a267974ce (diff)
downloadpasst-fb449b16bd8ccf8b751dfa5aee54cb7de0525706.tar
passt-fb449b16bd8ccf8b751dfa5aee54cb7de0525706.tar.gz
passt-fb449b16bd8ccf8b751dfa5aee54cb7de0525706.tar.bz2
passt-fb449b16bd8ccf8b751dfa5aee54cb7de0525706.tar.lz
passt-fb449b16bd8ccf8b751dfa5aee54cb7de0525706.tar.xz
passt-fb449b16bd8ccf8b751dfa5aee54cb7de0525706.tar.zst
passt-fb449b16bd8ccf8b751dfa5aee54cb7de0525706.zip
isolation: Prevent any child processes gaining capabilities
We drop our own capabilities, but it's possible that processes we exec() could gain extra privilege via file capabilities. It shouldn't be possible for us to exec() anyway due to seccomp() and our filesystem isolation. But just in case, zero the bounding and inheritable capability sets to prevent any such child from gainin privilege. Note that we do this *after* spawning the pasta shell/command (if any), because we do want the user to be able to give that privilege if they want. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--isolation.c56
1 files changed, 56 insertions, 0 deletions
diff --git a/isolation.c b/isolation.c
index 7b8d5f6..3a4ec9f 100644
--- a/isolation.c
+++ b/isolation.c
@@ -122,6 +122,61 @@ static void drop_caps_ep_except(uint64_t keep)
}
/**
+ * clamp_caps() - Prevent any children from gaining caps
+ *
+ * This drops all capabilities from both the inheritable and the
+ * bounding set. This means that any exec()ed processes can't gain
+ * capabilities, even if they have file capabilities which would grant
+ * them. We shouldn't ever exec() in any case, but this provides an
+ * additional layer of protection. Executing this requires
+ * CAP_SETPCAP, which we will have within our userns.
+ *
+ * Note that dropping capabilites from the bounding set limits
+ * exec()ed processes, but does not remove them from the effective or
+ * permitted sets, so it doesn't reduce our own capabilities.
+ */
+static void clamp_caps(void)
+{
+ struct __user_cap_data_struct data[CAP_WORDS];
+ struct __user_cap_header_struct hdr = {
+ .version = CAP_VERSION,
+ .pid = 0,
+ };
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ /* Some errors can be ignored:
+ * - EINVAL, we'll get this for all values in 0..63
+ * that are not actually allocated caps
+ * - EPERM, we'll get this if we don't have
+ * CAP_SETPCAP, which can happen if using
+ * --netns-only. We don't need CAP_SETPCAP for
+ * normal operation, so carry on without it.
+ */
+ if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0) &&
+ errno != EINVAL && errno != EPERM) {
+ err("Couldn't drop cap %i from bounding set: %s",
+ i, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (syscall(SYS_capget, &hdr, data)) {
+ err("Couldn't get current capabilities: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < CAP_WORDS; i++)
+ data[i].inheritable = 0;
+
+ if (syscall(SYS_capset, &hdr, data)) {
+ err("Couldn't drop inheritable capabilities: %s",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+/**
* isolate_initial() - Early, config independent self isolation
*
* Should:
@@ -324,6 +379,7 @@ int isolate_prefork(struct ctx *c)
ns_caps |= BIT(CAP_NET_BIND_SERVICE);
}
+ clamp_caps();
drop_caps_ep_except(ns_caps);
return 0;