aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--conf.c10
-rw-r--r--dhcpv6.c4
-rw-r--r--passt.c16
-rw-r--r--pasta.c253
-rw-r--r--util.c129
-rw-r--r--util.h5
6 files changed, 389 insertions, 28 deletions
diff --git a/conf.c b/conf.c
index e3244aa..d0394a4 100644
--- a/conf.c
+++ b/conf.c
@@ -275,12 +275,11 @@ overlap:
*/
static void get_dns(struct ctx *c)
{
- int dns4_set, dns6_set, dnss_set, dns_set;
+ int dns4_set, dns6_set, dnss_set, dns_set, fd;
struct in6_addr *dns6 = &c->dns6[0];
struct fqdn *s = c->dns_search;
uint32_t *dns4 = &c->dns4[0];
char buf[BUFSIZ], *p, *end;
- FILE *r;
dns4_set = !c->v4 || !!*dns4;
dns6_set = !c->v6 || !IN6_IS_ADDR_UNSPECIFIED(dns6);
@@ -290,11 +289,10 @@ static void get_dns(struct ctx *c)
if (dns_set && dnss_set)
return;
- r = fopen("/etc/resolv.conf", "r");
- if (!r)
+ if ((fd = open("/etc/resolv.conf", O_RDONLY)) < 0)
goto out;
- while (fgets(buf, BUFSIZ, r)) {
+ while (line_read(buf, BUFSIZ, fd)) {
if (!dns_set && strstr(buf, "nameserver ") == buf) {
p = strrchr(buf, ' ');
if (!p)
@@ -333,7 +331,7 @@ static void get_dns(struct ctx *c)
}
}
- fclose(r);
+ close(fd);
out:
if (!dns_set && dns4 == c->dns4 && dns6 == c->dns6)
diff --git a/dhcpv6.c b/dhcpv6.c
index 19c7a62..d514e8d 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -588,10 +588,10 @@ int dhcpv6(struct ctx *c, struct ethhdr *eh, size_t len)
*/
void dhcpv6_init(struct ctx *c)
{
- struct tm y2k = { 0, 0, 0, 1, 0, 100, 0, 0, 0, 0, NULL };
+ time_t y2k = 946684800; /* Epoch to 2000-01-01T00:00:00Z, no mktime() */
uint32_t duid_time;
- duid_time = htonl(difftime(time(NULL), mktime(&y2k)));
+ duid_time = htonl(difftime(time(NULL), y2k));
resp.server_id.duid_time = duid_time;
resp_not_on_link.server_id.duid_time = duid_time;
diff --git a/passt.c b/passt.c
index 0628d8c..e8f4e62 100644
--- a/passt.c
+++ b/passt.c
@@ -192,10 +192,10 @@ static void seccomp(struct ctx *c)
*
* Return: 0 once interrupted, non-zero on failure
*
- * #syscalls read write open close fork dup2 exit chdir brk ioctl writev syslog
+ * #syscalls read write open close fork dup2 exit chdir ioctl writev syslog
* #syscalls prlimit64 epoll_ctl epoll_create1 epoll_wait accept4 accept listen
* #syscalls socket bind connect getsockopt setsockopt recvfrom sendto shutdown
- * #syscalls openat fstat fcntl lseek
+ * #syscalls openat fstat fcntl lseek clone setsid exit_group
* #syscalls:pasta rt_sigreturn
*/
int main(int argc, char **argv)
@@ -226,16 +226,16 @@ int main(int argc, char **argv)
if (madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE))
perror("madvise");
- openlog(log_name, 0, LOG_DAEMON);
+ __openlog(log_name, 0, LOG_DAEMON);
- setlogmask(LOG_MASK(LOG_EMERG));
+ __setlogmask(LOG_MASK(LOG_EMERG));
conf(&c, argc, argv);
seccomp(&c);
if (!c.debug && (c.stderr || isatty(fileno(stdout))))
- openlog(log_name, LOG_PERROR, LOG_DAEMON);
+ __openlog(log_name, LOG_PERROR, LOG_DAEMON);
c.epollfd = epoll_create1(0);
if (c.epollfd == -1) {
@@ -271,11 +271,11 @@ int main(int argc, char **argv)
dhcpv6_init(&c);
if (c.debug)
- setlogmask(LOG_UPTO(LOG_DEBUG));
+ __setlogmask(LOG_UPTO(LOG_DEBUG));
else if (c.quiet)
- setlogmask(LOG_UPTO(LOG_ERR));
+ __setlogmask(LOG_UPTO(LOG_ERR));
else
- setlogmask(LOG_UPTO(LOG_INFO));
+ __setlogmask(LOG_UPTO(LOG_INFO));
if (isatty(fileno(stdout)) && !c.foreground)
daemon(0, 0);
diff --git a/pasta.c b/pasta.c
new file mode 100644
index 0000000..7c53c13
--- /dev/null
+++ b/pasta.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ * for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ * for network namespace/tap device mode
+ *
+ * pasta.c - pasta (namespace) specific implementations
+ *
+ * Copyright (c) 2020-2021 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ *
+ * #syscalls:pasta clone unshare waitid kill execve exit_group rt_sigprocmask
+ * #syscalls:pasta geteuid getdents64 readlink setsid
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <dirent.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <net/ethernet.h>
+#include <sys/syscall.h>
+
+#include "util.h"
+#include "passt.h"
+#include "netlink.h"
+
+/* PID of child, in case we created a namespace, and its procfs link */
+static int pasta_child_pid;
+static char pasta_child_ns[PATH_MAX];
+
+/**
+ * pasta_ns_cleanup() - Look for processes in namespace, terminate them
+ */
+static void pasta_ns_cleanup(void)
+{
+ char proc_path[PATH_MAX], ns_link[PATH_MAX], buf[BUFSIZ];
+ int recheck = 0, found = 0, waited = 0;
+ int dir_fd, n;
+
+ if (!*pasta_child_ns)
+ return;
+
+loop:
+ if ((dir_fd = open("/proc", O_RDONLY | O_DIRECTORY)) < 0)
+ return;
+
+ while ((n = syscall(SYS_getdents64, dir_fd, buf, BUFSIZ)) > 0) {
+ struct dirent *dp = (struct dirent *)buf;
+ int pos = 0;
+
+ while (pos < n) {
+ pid_t pid;
+
+ errno = 0;
+ pid = strtol(dp->d_name, NULL, 0);
+ if (!pid || errno)
+ goto next;
+
+ snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", pid);
+ if (readlink(proc_path, ns_link, PATH_MAX) < 0)
+ goto next;
+
+ if (!strncmp(ns_link, pasta_child_ns, PATH_MAX)) {
+ found = 1;
+ if (waited)
+ kill(pid, SIGKILL);
+ else
+ kill(pid, SIGQUIT);
+ }
+next:
+ dp = (struct dirent *)(buf + (pos += dp->d_reclen));
+ }
+ }
+
+ close(dir_fd);
+
+ if (!found)
+ return;
+
+ if (waited) {
+ if (recheck) {
+ info("Some processes in namespace didn't quit");
+ } else {
+ found = 0;
+ recheck = 1;
+ goto loop;
+ }
+ return;
+ }
+
+ info("Waiting for all processes in namespace to terminate");
+ sleep(1);
+ waited = 1;
+ goto loop;
+}
+
+/**
+ * pasta_child_handler() - Exit once shell exits (if we started it), reap clones
+ * @signal: Unused, handler deals with SIGCHLD only
+ */
+void pasta_child_handler(int signal)
+{
+ siginfo_t infop;
+
+ (void)signal;
+
+ if (pasta_child_pid &&
+ !waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) {
+ if (infop.si_pid == pasta_child_pid) {
+ pasta_ns_cleanup();
+ exit(EXIT_SUCCESS);
+ }
+ }
+
+ waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
+ waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
+}
+
+/**
+ * pasta_wait_for_ns() - Busy loop until we can enter the target namespace
+ * @arg: Execution context
+ *
+ * Return: 0
+ */
+static int pasta_wait_for_ns(void *arg)
+{
+ struct ctx *c = (struct ctx *)arg;
+ char ns[PATH_MAX];
+
+ if (c->netns_only)
+ goto netns;
+
+ snprintf(ns, PATH_MAX, "/proc/%i/ns/user", pasta_child_pid);
+ do
+ while ((c->pasta_userns_fd = open(ns, O_RDONLY)) < 0);
+ while (setns(c->pasta_userns_fd, 0) && !close(c->pasta_userns_fd));
+
+netns:
+ snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid);
+ do
+ while ((c->pasta_netns_fd = open(ns, O_RDONLY)) < 0);
+ while (setns(c->pasta_netns_fd, 0) && !close(c->pasta_netns_fd));
+
+ return 0;
+}
+
+/**
+ * pasta_start_ns() - Fork shell in new namespace if target ns is not given
+ * @c: Execution context
+ */
+void pasta_start_ns(struct ctx *c)
+{
+ char buf[BUFSIZ], *shell, proc_path[PATH_MAX];
+ int euid = geteuid();
+ int fd;
+
+ c->foreground = 1;
+ if (!c->debug)
+ c->quiet = 1;
+
+ if ((pasta_child_pid = fork()) == -1) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pasta_child_pid) {
+ NS_CALL(pasta_wait_for_ns, c);
+
+ snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net",
+ pasta_child_pid);
+ readlink(proc_path, pasta_child_ns, PATH_MAX);
+
+ return;
+ }
+
+ if (unshare(CLONE_NEWNET | (c->netns_only ? 0 : CLONE_NEWUSER))) {
+ perror("unshare");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!c->netns_only) {
+ snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1);
+
+ fd = open("/proc/self/uid_map", O_WRONLY);
+ write(fd, buf, strlen(buf));
+ close(fd);
+
+ fd = open("/proc/self/setgroups", O_WRONLY);
+ write(fd, "deny", sizeof("deny"));
+ close(fd);
+
+ fd = open("/proc/self/gid_map", O_WRONLY);
+ write(fd, buf, strlen(buf));
+ close(fd);
+ }
+
+ fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY);
+ write(fd, "0 0", strlen("0 0"));
+ close(fd);
+
+ shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
+ if (strstr(shell, "/bash"))
+ execve(shell, ((char *[]) { shell, "-l", NULL }), environ);
+ else
+ execve(shell, ((char *[]) { shell, NULL }), environ);
+
+ perror("execve");
+ exit(EXIT_FAILURE);
+}
+
+/**
+ * pasta_ns_conf() - Set up loopback and tap interfaces in namespace as needed
+ * @c: Execution context
+ */
+void pasta_ns_conf(struct ctx *c)
+{
+ nl_link(1, 1 /* lo */, MAC_ZERO, 1);
+
+ if (c->pasta_conf_ns) {
+ nl_link(1, c->pasta_ifi, c->mac_guest, 1);
+
+ if (c->v4) {
+ nl_addr(1, c->pasta_ifi, AF_INET, &c->addr4,
+ __builtin_popcount(c->mask4), NULL);
+ nl_route(1, c->pasta_ifi, AF_INET, &c->gw4);
+ }
+
+ if (c->v6) {
+ nl_addr(1, c->pasta_ifi, AF_INET6, &c->addr6, 64, NULL);
+ nl_route(1, c->pasta_ifi, AF_INET6, &c->gw6);
+ }
+ } else {
+ nl_link(1, c->pasta_ifi, c->mac_guest, 0);
+ }
+
+ proto_update_l2_buf(c->mac_guest, NULL, NULL);
+}
diff --git a/util.c b/util.c
index e0fa5ff..a3b3eb8 100644
--- a/util.c
+++ b/util.c
@@ -37,24 +37,27 @@
#include "util.h"
#include "passt.h"
+/* For __openlog() and __setlogmask() wrappers, and __vsyslog() (replacement) */
+static int log_mask;
+static int log_sock = -1;
+static char log_ident[BUFSIZ];
+static int log_opt;
+static time_t log_debug_start;
+
#define logfn(name, level) \
void name(const char *format, ...) { \
- char ts[sizeof("Mmm dd hh:mm:ss.")]; \
struct timespec tp; \
- struct tm *tm; \
va_list args; \
\
if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \
clock_gettime(CLOCK_REALTIME, &tp); \
- tm = gmtime(&tp.tv_sec); \
- strftime(ts, sizeof(ts), "%b %d %T.", tm); \
- \
- fprintf(stderr, "%s%04lu: ", ts, \
+ fprintf(stderr, "%lu.%04lu: ", \
+ tp.tv_sec - log_debug_start, \
tp.tv_nsec / (100 * 1000)); \
} \
\
va_start(args, format); \
- vsyslog(level, format, args); \
+ __vsyslog(level, format, args); \
va_end(args); \
\
if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \
@@ -73,6 +76,79 @@ logfn(info, LOG_INFO)
logfn(debug, LOG_DEBUG)
/**
+ * __openlog() - Non-optional openlog() wrapper, to allow custom vsyslog()
+ * @ident: openlog() identity (program name)
+ * @option: openlog() options
+ * @facility: openlog() facility (LOG_DAEMON)
+ */
+void __openlog(const char *ident, int option, int facility)
+{
+ struct timespec tp;
+
+ clock_gettime(CLOCK_REALTIME, &tp);
+ log_debug_start = tp.tv_sec;
+
+ if (log_sock < 0) {
+ struct sockaddr_un a = { .sun_family = AF_UNIX, };
+
+ log_sock = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ if (log_sock < 0)
+ return;
+
+ strncpy(a.sun_path, _PATH_LOG, sizeof(a.sun_path));
+ if (connect(log_sock, (const struct sockaddr *)&a, sizeof(a))) {
+ close(log_sock);
+ log_sock = -1;
+ return;
+ }
+ }
+
+ log_mask |= facility;
+ strncpy(log_ident, ident, sizeof(log_ident) - 1);
+ log_opt = option;
+
+ openlog(ident, option, facility);
+}
+
+/**
+ * __setlogmask() - setlogmask() wrapper, to allow custom vsyslog()
+ * @mask: Same as setlogmask() mask
+ */
+void __setlogmask(int mask)
+{
+ log_mask = mask;
+ setlogmask(mask);
+}
+
+/**
+ * __vsyslog() - vsyslog() implementation not using heap memory
+ * @pri: Facility and level map, same as priority for vsyslog()
+ * @format: Same as vsyslog() format
+ * @ap: Same as vsyslog() ap
+ */
+void __vsyslog(int pri, const char *format, va_list ap)
+{
+ char buf[BUFSIZ];
+ int n;
+
+ if (!(LOG_MASK(LOG_PRI(pri)) & log_mask))
+ return;
+
+ /* Send without name and timestamp, the system logger should add them */
+ n = snprintf(buf, BUFSIZ, "<%i> ", pri);
+
+ n += vsnprintf(buf + n, BUFSIZ - n, format, ap);
+
+ if (format[strlen(format)] != '\n')
+ n += snprintf(buf + n, BUFSIZ - n, "\n");
+
+ if (log_opt | LOG_PERROR)
+ fprintf(stderr, buf + sizeof("<0>"));
+
+ send(log_sock, buf, n, 0);
+}
+
+/**
* ipv6_l4hdr() - Find pointer to L4 header in IPv6 packet and extract protocol
* @ip6h: IPv6 header
* @proto: Filled with L4 protocol number
@@ -292,6 +368,35 @@ int bitmap_isset(uint8_t *map, int bit)
}
/**
+ * line_read() - Same as fgets(), without using heap, a file instead of a stream
+ * @buf: Read buffer
+ * @len: Maximum line length
+ * @fd: File descriptor for reading
+ *
+ * Return: @buf if a line is found, NULL on EOF or error
+ */
+char *line_read(char *buf, size_t len, int fd)
+{
+ char *p;
+ int n;
+
+ n = read(fd, buf, --len);
+ if (n <= 0)
+ return NULL;
+
+ buf[len] = 0;
+ if (!(p = strchr(buf, '\n')))
+ return buf;
+
+ *p = 0;
+ if (p == buf)
+ return buf;
+
+ lseek(fd, (p - buf) - n + 1, SEEK_CUR);
+ return buf;
+}
+
+/**
* procfs_scan_listen() - Set bits for listening TCP or UDP sockets from procfs
* @name: Corresponding name of file under /proc/net/
* @map: Bitmap where numbers of ports in listening state will be set
@@ -302,14 +407,14 @@ void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude)
char line[200], path[PATH_MAX];
unsigned long port;
unsigned int state;
- FILE *fp;
+ int fd;
snprintf(path, PATH_MAX, "/proc/net/%s", name);
- if (!(fp = fopen(path, "r")))
+ if ((fd = open(path, O_RDONLY)) < 0)
return;
- fgets(line, sizeof(line), fp);
- while (fgets(line, sizeof(line), fp)) {
+ line_read(line, sizeof(line), fd);
+ while (line_read(line, sizeof(line), fd)) {
if (sscanf(line, "%*u: %*x:%lx %*x:%*x %x", &port, &state) != 2)
continue;
@@ -324,7 +429,7 @@ void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude)
bitmap_set(map, port);
}
- fclose(fp);
+ close(fd);
}
/**
diff --git a/util.h b/util.h
index 605b708..9edf041 100644
--- a/util.h
+++ b/util.h
@@ -133,6 +133,7 @@ enum {
#include <net/if.h>
#include <linux/ip.h>
#include <limits.h>
+#include <stdarg.h>
enum bind_type {
BIND_ANY = 0,
@@ -143,6 +144,9 @@ enum bind_type {
struct ctx;
+void __openlog(const char *ident, int option, int facility);
+void __vsyslog(int pri, const char *fmt, va_list ap);
+void __setlogmask(int mask);
char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto);
int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
enum bind_type bind_addr, uint32_t data);
@@ -151,5 +155,6 @@ int timespec_diff_ms(struct timespec *a, struct timespec *b);
void bitmap_set(uint8_t *map, int bit);
void bitmap_clear(uint8_t *map, int bit);
int bitmap_isset(uint8_t *map, int bit);
+char *line_read(char *buf, size_t len, int fd);
void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude);
int ns_enter(struct ctx *c);