aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-10-13 22:25:03 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-10-14 13:15:46 +0200
commit66d5930ec77caed942404ceef4829f2c4ca431bd (patch)
treecef75db6ce37ddd50de819f1dc53dcd602b97c36
parentf318174a9387ecd94d83ed0b9356940c60753846 (diff)
downloadpasst-66d5930ec77caed942404ceef4829f2c4ca431bd.tar
passt-66d5930ec77caed942404ceef4829f2c4ca431bd.tar.gz
passt-66d5930ec77caed942404ceef4829f2c4ca431bd.tar.bz2
passt-66d5930ec77caed942404ceef4829f2c4ca431bd.tar.lz
passt-66d5930ec77caed942404ceef4829f2c4ca431bd.tar.xz
passt-66d5930ec77caed942404ceef4829f2c4ca431bd.tar.zst
passt-66d5930ec77caed942404ceef4829f2c4ca431bd.zip
passt, pasta: Add seccomp support
List of allowed syscalls comes from comments in the form: #syscalls <list> for syscalls needed both in passt and pasta mode, and: #syscalls:pasta <list> #syscalls:passt <list> for syscalls specifically needed in pasta or passt mode only. seccomp.sh builds a list of BPF statements from those comments, prefixed by a binary search tree to keep lookup fast. While at it, clean up a bit the Makefile using wildcards. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--Makefile17
-rw-r--r--conf.c2
-rw-r--r--igmp.c1
-rw-r--r--mld.c1
-rw-r--r--passt.c36
-rwxr-xr-xseccomp.sh180
-rw-r--r--tap.c11
-rw-r--r--tcp.c11
-rw-r--r--udp.c7
-rw-r--r--util.c2
10 files changed, 259 insertions, 9 deletions
diff --git a/Makefile b/Makefile
index 9f0e3bf..26b6840 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,7 @@ CFLAGS += -Wall -Wextra -pedantic
CFLAGS += -DRLIMIT_STACK_VAL=$(shell ulimit -s)
CFLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE)
CFLAGS += -DNETNS_RUN_DIR=\"/run/netns\"
+CFLAGS += -DPASST_AUDIT_ARCH=AUDIT_ARCH_$(shell uname -m | tr [a-z] [A-Z])
prefix ?= /usr/local
@@ -13,14 +14,12 @@ avx2: clean all
static: CFLAGS += -static
static: clean all
-passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \
- dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \
- netlink.c netlink.h pasta.c pasta.h siphash.c siphash.h tap.c tap.h \
- icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h
- $(CC) $(CFLAGS) \
- passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c pasta.c pcap.c \
- ndp.c netlink.c siphash.c tap.c icmp.c tcp.c udp.c util.c \
- -o passt
+seccomp.h: *.c $(filter-out seccomp.h,$(wildcard *.h))
+ @ ./seccomp.sh
+
+passt: $(filter-out qrap.c,$(wildcard *.c)) \
+ $(filter-out qrap.h,$(wildcard *.h)) seccomp.h
+ $(CC) $(CFLAGS) $(filter-out qrap.c,$(wildcard *.c)) -o passt
pasta: passt
ln -s passt pasta
@@ -35,7 +34,7 @@ qrap: qrap.c passt.h
.PHONY: clean
clean:
- -${RM} passt *.o qrap pasta pasta.1 passt4netns \
+ -${RM} passt *.o seccomp.h qrap pasta pasta.1 passt4netns \
passt.tar passt.tar.gz *.deb *.rpm
install: passt pasta qrap
diff --git a/conf.c b/conf.c
index 6399fbb..e3244aa 100644
--- a/conf.c
+++ b/conf.c
@@ -10,6 +10,8 @@
*
* Copyright (c) 2020-2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
+ *
+ * #syscalls stat
*/
#define _GNU_SOURCE
diff --git a/igmp.c b/igmp.c
index 235002d..b92aa0a 100644
--- a/igmp.c
+++ b/igmp.c
@@ -1 +1,2 @@
/* TO BE IMPLEMENTED */
+__attribute__((__unused__)) static void __(void) { }
diff --git a/mld.c b/mld.c
index 235002d..b92aa0a 100644
--- a/mld.c
+++ b/mld.c
@@ -1 +1,2 @@
/* TO BE IMPLEMENTED */
+__attribute__((__unused__)) static void __(void) { }
diff --git a/passt.c b/passt.c
index b411657..0628d8c 100644
--- a/passt.c
+++ b/passt.c
@@ -51,7 +51,12 @@
#include <time.h>
#include <syslog.h>
#include <sys/stat.h>
+#include <seccomp.h>
+#include <sys/prctl.h>
+#include <linux/filter.h>
+#include <stddef.h>
+#include "seccomp.h"
#include "util.h"
#include "passt.h"
#include "dhcp.h"
@@ -158,11 +163,40 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
}
/**
+ * seccomp() - Set up seccomp filters depending on mode, won't return on failure
+ * @c: Execution context
+ */
+static void seccomp(struct ctx *c)
+{
+ struct sock_fprog prog;
+
+ if (c->mode == MODE_PASST) {
+ prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
+ prog.filter = filter_passt;
+ } else {
+ prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
+ prog.filter = filter_pasta;
+ }
+
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+ prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+ perror("prctl");
+ exit(EXIT_FAILURE);
+ }
+}
+
+/**
* main() - Entry point and main loop
* @argc: Argument count
* @argv: Options, plus optional target PID for pasta mode
*
* Return: 0 once interrupted, non-zero on failure
+ *
+ * #syscalls read write open close fork dup2 exit chdir brk ioctl writev syslog
+ * #syscalls prlimit64 epoll_ctl epoll_create1 epoll_wait accept4 accept listen
+ * #syscalls socket bind connect getsockopt setsockopt recvfrom sendto shutdown
+ * #syscalls openat fstat fcntl lseek
+ * #syscalls:pasta rt_sigreturn
*/
int main(int argc, char **argv)
{
@@ -198,6 +232,8 @@ int main(int argc, char **argv)
conf(&c, argc, argv);
+ seccomp(&c);
+
if (!c.debug && (c.stderr || isatty(fileno(stdout))))
openlog(log_name, LOG_PERROR, LOG_DAEMON);
diff --git a/seccomp.sh b/seccomp.sh
new file mode 100755
index 0000000..7e37bd3
--- /dev/null
+++ b/seccomp.sh
@@ -0,0 +1,180 @@
+#!/bin/sh -eu
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+#
+# PASST - Plug A Simple Socket Transport
+# for qemu/UNIX domain socket mode
+#
+# PASTA - Pack A Subtle Tap Abstraction
+# for network namespace/tap device mode
+#
+# seccomp.sh - Build seccomp profiles from "#syscalls[:PROFILE]" comments in code
+#
+# Copyright (c) 2021 Red Hat GmbH
+# Author: Stefano Brivio <sbrivio@redhat.com>
+
+TMP="$(mktemp)"
+OUT="seccomp.h"
+
+HEADER="/* This file was automatically generated by $(basename ${0}) */"
+
+# Prefix for each profile: check that 'arch' in seccomp_data is matching
+PRE='
+struct sock_filter filter_@PROFILE@[] = {
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, arch))),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, PASST_AUDIT_ARCH, 0, @KILL@),
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, nr))),
+
+'
+
+# Suffix for each profile: return actions
+POST=' BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+};
+'
+
+# Syscall, @NR@: number, @ALLOW@: offset to RET_ALLOW, @NAME@: syscall name
+CALL=' BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, @NR@, @ALLOW@, 0), /* @NAME@ */'
+
+# Binary search tree node or leaf, @NR@: value, @R@: right jump, @L@: left jump
+BST=' BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, @NR@, @R@, @L@),'
+
+# sub() - Substitute in-place file line with processed template line
+# $1: Line number
+# $@: Replacement for @KEY@ in the form KEY:value
+sub() {
+ IFS=
+ __line_no="${1}"
+ __template="$(eval printf '%s' "\${${2}}")"
+ shift; shift
+
+ sed -i "${__line_no}s#.*#${__template}#" "${TMP}"
+
+ for __def in ${@}; do
+ __key="@${__def%%:*}@"
+ __value="${__def#*:}"
+ sed -i "${__line_no}s/${__key}/${__value}/" "${TMP}"
+ done
+ unset IFS
+}
+
+# finish() - Finalise header file from temporary files with prefix and suffix
+# $1: Variable name of prefix
+# $@: Replacements for prefix variable
+finish() {
+ IFS=
+ __out="$(eval printf '%s' "\${${1}}")"
+ shift
+
+ for __def in ${@}; do
+ __key="@${__def%%:*}@"
+ __value="${__def#*:}"
+ __out="$(printf '%s' "${__out}" | sed "s#${__key}#${__value}#")"
+ done
+
+ printf '%s\n' "${__out}" >> "${OUT}"
+ cat "${TMP}" >> "${OUT}"
+ rm "${TMP}"
+ printf '%s' "${POST}" >> "${OUT}"
+ unset IFS
+}
+
+# log2() - Binary logarithm
+# $1: Operand
+log2() {
+ __x=-1
+ __y=${1}
+ while [ ${__y} -gt 0 ]; do : $((__y >>= 1)); __x=$((__x + 1)); done
+ echo ${__x}
+}
+
+# gen_profile() - Build struct sock_filter for a single profile
+# $1: Profile name
+# $@: Names of allowed system calls, amount padded to next power of two
+gen_profile() {
+ __profile="${1}"
+ shift
+
+ __statements_calls=${#}
+ __bst_levels=$(log2 $(( __statements_calls / 4 )) )
+ __statements_bst=$(( __statements_calls / 4 - 1 ))
+ __statements=$((__statements_calls + __statements_bst))
+
+ for __i in $(seq 1 ${__statements_bst} ); do
+ echo -1 >> "${TMP}"
+ done
+ for __i in $(seq 1 ${__statements_calls} ); do
+ ausyscall $(eval echo \${${__i}}) --exact >> "${TMP}"
+ done
+ sort -go "${TMP}" "${TMP}"
+
+ __distance=$(( __statements_calls / 2 ))
+ __level_nodes=1
+ __ll=0
+ __line=1
+ for __level in $(seq 1 $(( __bst_levels - 1 )) ); do
+ # Nodes
+ __cmp_pos=${__distance}
+
+ for __node in $(seq 1 ${__level_nodes}); do
+ __cmp_line=$(( __statements_bst + __cmp_pos ))
+ __lr=$(( __ll + 1 ))
+ __nr="$(sed -n ${__cmp_line}p "${TMP}")"
+
+ sub ${__line} BST "NR:${__nr}" "L:${__ll}" "R:${__lr}"
+
+ __ll=${__lr}
+ __line=$(( __line + 1 ))
+ __cmp_pos=$(( __cmp_pos + __distance * 2 ))
+ done
+
+ __distance=$(( __distance / 2 ))
+ __level_nodes=$(( __level_nodes * 2 ))
+ done
+
+ # Leaves
+ __ll=$(( __level_nodes - 1 ))
+ __lr=$(( __ll + __distance - 1 ))
+ __cmp_pos=${__distance}
+
+ for __leaf in $(seq 1 ${__level_nodes}); do
+ __cmp_line=$(( __statements_bst + __cmp_pos ))
+ __nr="$(sed -n ${__cmp_line}p "${TMP}")"
+ sub ${__line} BST "NR:${__nr}" "L:${__ll}" "R:${__lr}"
+
+ __ll=$(( __lr + __distance - 1 ))
+ __lr=$(( __ll + __distance))
+ __line=$(( __line + 1 ))
+ __cmp_pos=$(( __cmp_pos + __distance * 2 ))
+ done
+
+ # Calls
+ for __i in $(seq $(( __statements_bst + 1 )) ${__statements}); do
+ __nr="$(sed -n ${__i}p "${TMP}")"
+ __name=$(ausyscall ${__nr})
+ __allow=$(( __statements - __i + 1 ))
+ sub ${__i} CALL "NR:${__nr}" "NAME:${__name}" "ALLOW:${__allow}"
+ done
+
+ finish PRE "PROFILE:${__profile}" "KILL:$(( __statements + 1))"
+}
+
+printf '%s\n' "${HEADER}" > "${OUT}"
+__profiles="$(sed -n 's/[\t ]*\*[\t ]*#syscalls:\([^ ]*\).*/\1/p' *.[ch] | sort -u)"
+for __p in ${__profiles}; do
+ __calls="$(sed -n 's/[\t ]*\*[\t ]*#syscalls\(:'"${__p}"'\|\)[\t ]\{1,\}\(.*\)/\2/p' *.[ch] | tr ' ' '\n' | sort -u)"
+
+ echo "seccomp profile ${__p} allows: ${__calls}" | tr '\n' ' ' | fmt -t
+
+ # Pad here to keep gen_profile() "simple"
+ __count=0
+ for __c in ${__calls}; do __count=$(( __count + 1 )); done
+ __padded=$(( 1 << (( $(log2 ${__count}) + 1 )) ))
+ for __i in $( seq ${__count} $(( __padded - 1 )) ); do
+ __calls="${__calls} tuxcall"
+ done
+
+ gen_profile "${__p}" ${__calls}
+done
diff --git a/tap.c b/tap.c
index ec2b8b5..d20503d 100644
--- a/tap.c
+++ b/tap.c
@@ -10,6 +10,8 @@
*
* Copyright (c) 2020-2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
+ *
+ * #syscalls recvfrom sendto
*/
#define _GNU_SOURCE
@@ -768,6 +770,8 @@ restart:
/**
* tap_sock_init_unix() - Create and bind AF_UNIX socket, wait for connection
* @c: Execution context
+ *
+ * #syscalls:passt unlink
*/
static void tap_sock_init_unix(struct ctx *c)
{
@@ -819,8 +823,13 @@ static void tap_sock_init_unix(struct ctx *c)
}
info("UNIX domain socket bound at %s\n", addr.sun_path);
+#ifdef PASST_LEGACY_NO_OPTIONS
+ /*
+ * syscalls:passt chmod
+ */
chmod(addr.sun_path,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
+#endif
pcap_init(c, i);
@@ -850,6 +859,8 @@ static int tun_ns_fd = -1;
* @c: Execution context
*
* Return: 0
+ *
+ * #syscalls:pasta ioctl
*/
static int tap_ns_tun(void *arg)
{
diff --git a/tcp.c b/tcp.c
index b49f49e..05382f7 100644
--- a/tcp.c
+++ b/tcp.c
@@ -303,6 +303,8 @@
* - SPLICE_FIN_FROM: FIN (EPOLLRDHUP) seen from originating socket
* - SPLICE_FIN_TO: FIN (EPOLLRDHUP) seen from connected socket
* - SPLICE_FIN_BOTH: FIN (EPOLLRDHUP) seen from both sides
+ *
+ * #syscalls pipe pipe2
*/
#define _GNU_SOURCE
@@ -2078,6 +2080,9 @@ static void tcp_sock_consume(struct tcp_tap_conn *conn, uint32_t ack_seq)
* @now: Current timestamp
*
* Return: negative on connection reset, 0 otherwise
+ *
+ * #syscalls recvmsg
+ * #syscalls:passt sendmmsg sendmsg
*/
static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn,
struct timespec *now)
@@ -2320,6 +2325,8 @@ out:
* @msg: Array of messages from tap
* @count: Count of messages
* @now: Current timestamp
+ *
+ * #syscalls sendmsg
*/
static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
struct tap_l4_msg *msg, int count,
@@ -2965,6 +2972,8 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref,
* @c: Execution context
* @ref: epoll reference
* @events: epoll events bitmap
+ *
+ * #syscalls splice
*/
void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
uint32_t events)
@@ -3525,6 +3534,8 @@ static int tcp_sock_refill(void *arg)
* @c: Execution context
*
* Return: 0 on success, -1 on failure
+ *
+ * #syscalls getrandom
*/
int tcp_sock_init(struct ctx *c, struct timespec *now)
{
diff --git a/udp.c b/udp.c
index 0837cba..e77345f 100644
--- a/udp.c
+++ b/udp.c
@@ -419,6 +419,8 @@ static void udp_sock6_iov_init(void)
* @splice: UDP_BACK_TO_INIT from init, UDP_BACK_TO_NS from namespace
*
* Return: connected socket, negative error code on failure
+ *
+ * #syscalls:pasta getsockname
*/
int udp_splice_connect(struct ctx *c, int v6, int bound_sock,
in_port_t src, in_port_t dst, int splice)
@@ -640,6 +642,9 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
* @ref: epoll reference
* @events: epoll events bitmap
* @now: Current timestamp
+ *
+ * #syscalls recvmmsg
+ * #syscalls:passt sendmmsg sendmsg
*/
void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
struct timespec *now)
@@ -877,6 +882,8 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
* @now: Current timestamp
*
* Return: count of consumed packets
+ *
+ * #syscalls sendmmsg
*/
int udp_tap_handler(struct ctx *c, int af, void *addr,
struct tap_l4_msg *msg, int count, struct timespec *now)
diff --git a/util.c b/util.c
index 4945431..e0fa5ff 100644
--- a/util.c
+++ b/util.c
@@ -332,6 +332,8 @@ void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude)
* @c: Execution context
*
* Return: 0 on success, -1 on failure
+ *
+ * #syscalls:pasta setns
*/
int ns_enter(struct ctx *c)
{