diff options
Diffstat (limited to 'passt.c')
-rw-r--r-- | passt.c | 191 |
1 files changed, 73 insertions, 118 deletions
@@ -35,9 +35,7 @@ #include <syslog.h> #include <sys/prctl.h> #include <netinet/if_ether.h> -#ifdef HAS_GETRANDOM -#include <sys/random.h> -#endif +#include <libgen.h> #include "util.h" #include "passt.h" @@ -51,6 +49,8 @@ #include "arch.h" #include "log.h" #include "tcp_splice.h" +#include "ndp.h" +#include "vu_common.h" #define EPOLL_EVENTS 8 @@ -65,9 +65,9 @@ char *epoll_type_str[] = { [EPOLL_TYPE_TCP_SPLICE] = "connected spliced TCP socket", [EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket", [EPOLL_TYPE_TCP_TIMER] = "TCP timer", - [EPOLL_TYPE_UDP] = "UDP socket", - [EPOLL_TYPE_ICMP] = "ICMP socket", - [EPOLL_TYPE_ICMPV6] = "ICMPv6 socket", + [EPOLL_TYPE_UDP_LISTEN] = "listening UDP socket", + [EPOLL_TYPE_UDP_REPLY] = "UDP reply socket", + [EPOLL_TYPE_PING] = "ICMP/ICMPv6 ping socket", [EPOLL_TYPE_NSQUIT_INOTIFY] = "namespace inotify watch", [EPOLL_TYPE_NSQUIT_TIMER] = "namespace timer watch", [EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device", @@ -86,7 +86,7 @@ static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES, */ static void post_handler(struct ctx *c, const struct timespec *now) { -#define CALL_PROTO_HANDLER(c, now, lc, uc) \ +#define CALL_PROTO_HANDLER(lc, uc) \ do { \ extern void \ lc ## _defer_handler (struct ctx *c) \ @@ -105,49 +105,30 @@ static void post_handler(struct ctx *c, const struct timespec *now) } while (0) /* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */ - CALL_PROTO_HANDLER(c, now, tcp, TCP); + CALL_PROTO_HANDLER(tcp, TCP); /* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */ - CALL_PROTO_HANDLER(c, now, udp, UDP); - /* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */ - CALL_PROTO_HANDLER(c, now, icmp, ICMP); + CALL_PROTO_HANDLER(udp, UDP); flow_defer_handler(c, now); #undef CALL_PROTO_HANDLER + + ndp_timer(c, now); } /** - * secret_init() - Create secret value for SipHash calculations + * random_init() - Initialise things based on random data * @c: Execution context */ -static void secret_init(struct ctx *c) +static void random_init(struct ctx *c) { -#ifndef HAS_GETRANDOM - int dev_random = open("/dev/random", O_RDONLY); - unsigned int random_read = 0; - - while (dev_random && random_read < sizeof(c->hash_secret)) { - int ret = read(dev_random, - (uint8_t *)&c->hash_secret + random_read, - sizeof(c->hash_secret) - random_read); + unsigned int seed; - if (ret == -1 && errno == EINTR) - continue; - - if (ret <= 0) - break; + /* Create secret value for SipHash calculations */ + raw_random(&c->hash_secret, sizeof(c->hash_secret)); - random_read += ret; - } - if (dev_random >= 0) - close(dev_random); - if (random_read < sizeof(c->hash_secret)) { -#else - if (getrandom(&c->hash_secret, sizeof(c->hash_secret), - GRND_RANDOM) < 0) { -#endif /* !HAS_GETRANDOM */ - perror("TCP initial sequence getrandom"); - exit(EXIT_FAILURE); - } + /* Seed pseudo-RNG for things that need non-cryptographic random */ + raw_random(&seed, sizeof(seed)); + srandom(seed); } /** @@ -167,7 +148,7 @@ static void timer_init(struct ctx *c, const struct timespec *now) */ void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) { - tcp_buf_update_l2(eth_d, eth_s); + tcp_update_l2_buf(eth_d, eth_s); udp_update_l2_buf(eth_d, eth_s); } @@ -195,28 +176,30 @@ void exit_handler(int signal) * Return: non-zero on failure * * #syscalls read write writev - * #syscalls socket bind connect getsockopt setsockopt s390x:socketcall close - * #syscalls recvfrom sendto shutdown - * #syscalls armv6l:recv armv7l:recv ppc64le:recv - * #syscalls armv6l:send armv7l:send ppc64le:send + * #syscalls socket getsockopt setsockopt s390x:socketcall i686:socketcall close + * #syscalls bind connect recvfrom sendto shutdown + * #syscalls arm:recv ppc64le:recv arm:send ppc64le:send * #syscalls accept4|accept listen epoll_ctl epoll_wait|epoll_pwait epoll_pwait - * #syscalls clock_gettime armv6l:clock_gettime64 armv7l:clock_gettime64 + * #syscalls clock_gettime arm:clock_gettime64 i686:clock_gettime64 */ int main(int argc, char **argv) { - int nfds, i, devnull_fd = -1, pidfile_fd = -1; struct epoll_event events[EPOLL_EVENTS]; - char *log_name, argv0[PATH_MAX], *name; + int nfds, i, devnull_fd = -1; + char argv0[PATH_MAX], *name; struct ctx c = { 0 }; struct rlimit limit; struct timespec now; struct sigaction sa; + if (clock_gettime(CLOCK_MONOTONIC, &log_start)) + die_perror("Failed to get CLOCK_MONOTONIC time"); + arch_avx2_exec(argv); - isolate_initial(); + isolate_initial(argc, argv); - c.pasta_netns_fd = c.fd_tap = c.fd_tap_listen = -1; + c.pasta_netns_fd = c.fd_tap = c.pidfile_fd = -1; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; @@ -231,70 +214,52 @@ int main(int argc, char **argv) name = basename(argv0); if (strstr(name, "pasta")) { sa.sa_handler = pasta_child_handler; - if (sigaction(SIGCHLD, &sa, NULL)) { - die("Couldn't install signal handlers: %s", - strerror(errno)); - } + if (sigaction(SIGCHLD, &sa, NULL)) + die_perror("Couldn't install signal handlers"); - if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) { - die("Couldn't set disposition for SIGPIPE: %s", - strerror(errno)); - } + if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) + die_perror("Couldn't set disposition for SIGPIPE"); c.mode = MODE_PASTA; - log_name = "pasta"; } else if (strstr(name, "passt")) { c.mode = MODE_PASST; - log_name = "passt"; } else { exit(EXIT_FAILURE); } madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE); - __openlog(log_name, 0, LOG_DAEMON); - c.epollfd = epoll_create1(EPOLL_CLOEXEC); - if (c.epollfd == -1) { - perror("epoll_create1"); - exit(EXIT_FAILURE); - } + if (c.epollfd == -1) + die_perror("Failed to create epoll file descriptor"); + + if (getrlimit(RLIMIT_NOFILE, &limit)) + die_perror("Failed to get maximum value of open files limit"); - if (getrlimit(RLIMIT_NOFILE, &limit)) { - perror("getrlimit"); - exit(EXIT_FAILURE); - } c.nofile = limit.rlim_cur = limit.rlim_max; - if (setrlimit(RLIMIT_NOFILE, &limit)) { - perror("setrlimit"); - exit(EXIT_FAILURE); - } + if (setrlimit(RLIMIT_NOFILE, &limit)) + die_perror("Failed to set current limit for open files"); + sock_probe_mem(&c); conf(&c, argc, argv); trace_init(c.trace); - if (c.force_stderr || isatty(fileno(stdout))) - __openlog(log_name, LOG_PERROR, LOG_DAEMON); - pasta_netns_quit_init(&c); - tap_sock_init(&c); - vu_init(&c); + tap_backend_init(&c); - secret_init(&c); + random_init(&c); - clock_gettime(CLOCK_MONOTONIC, &now); + if (clock_gettime(CLOCK_MONOTONIC, &now)) + die_perror("Failed to get CLOCK_MONOTONIC time"); flow_init(); if ((!c.no_udp && udp_init(&c)) || (!c.no_tcp && tcp_init(&c))) exit(EXIT_FAILURE); - if (!c.no_icmp) - icmp_init(); - - proto_update_l2_buf(c.mac_guest, c.mac); + proto_update_l2_buf(c.guest_mac, c.our_tap_mac); if (c.ifi4 && !c.no_dhcp) dhcp_init(); @@ -305,46 +270,39 @@ int main(int argc, char **argv) pcap_init(&c); if (!c.foreground) { - if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0) { - perror("/dev/null open"); - exit(EXIT_FAILURE); - } - } - - if (*c.pid_file) { - if ((pidfile_fd = open(c.pid_file, - O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC, - S_IRUSR | S_IWUSR)) < 0) { - perror("PID file open"); - exit(EXIT_FAILURE); - } + if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0) + die_perror("Failed to open /dev/null"); } if (isolate_prefork(&c)) die("Failed to sandbox process, exiting"); - if (!c.foreground) - __daemon(pidfile_fd, devnull_fd); - else - write_pidfile(pidfile_fd, getpid()); + if (!c.foreground) { + __daemon(c.pidfile_fd, devnull_fd); + log_stderr = false; + } else { + pidfile_write(c.pidfile_fd, getpid()); + } - if (pasta_child_pid) + if (pasta_child_pid) { kill(pasta_child_pid, SIGUSR1); + log_stderr = false; + } isolate_postfork(&c); timer_init(&c, &now); loop: - /* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */ + /* NOLINTBEGIN(bugprone-branch-clone): intervals can be the same */ /* cppcheck-suppress [duplicateValueTernary, unmatchedSuppression] */ nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL); - if (nfds == -1 && errno != EINTR) { - perror("epoll_wait"); - exit(EXIT_FAILURE); - } + /* NOLINTEND(bugprone-branch-clone) */ + if (nfds == -1 && errno != EINTR) + die_perror("epoll_wait() failed in main loop"); - clock_gettime(CLOCK_MONOTONIC, &now); + if (clock_gettime(CLOCK_MONOTONIC, &now)) + err_perror("Failed to get CLOCK_MONOTONIC time"); for (i = 0; i < nfds; i++) { union epoll_ref ref = *((union epoll_ref *)&events[i].data.u64); @@ -382,23 +340,20 @@ loop: case EPOLL_TYPE_TCP_TIMER: tcp_timer_handler(&c, ref); break; - case EPOLL_TYPE_UDP: - if (c.mode == MODE_VU) - udp_vu_sock_handler(&c, ref, eventmask, &now); - else - udp_buf_sock_handler(&c, ref, eventmask, &now); + case EPOLL_TYPE_UDP_LISTEN: + udp_listen_sock_handler(&c, ref, eventmask, &now); break; - case EPOLL_TYPE_ICMP: - icmp_sock_handler(&c, AF_INET, ref); + case EPOLL_TYPE_UDP_REPLY: + udp_reply_sock_handler(&c, ref, eventmask, &now); break; - case EPOLL_TYPE_ICMPV6: - icmp_sock_handler(&c, AF_INET6, ref); + case EPOLL_TYPE_PING: + icmp_sock_handler(&c, ref); break; case EPOLL_TYPE_VHOST_CMD: - tap_handler_vu(&c, eventmask); + vu_control_handler(c.vdev, c.fd_tap, eventmask); break; case EPOLL_TYPE_VHOST_KICK: - vu_kick_cb(&c, ref); + vu_kick_cb(c.vdev, ref, &now); break; default: /* Can't happen */ |