diff options
Diffstat (limited to 'util.c')
-rw-r--r-- | util.c | 294 |
1 files changed, 198 insertions, 96 deletions
@@ -28,11 +28,15 @@ #include <linux/errqueue.h> #include <getopt.h> +#include "linux_dep.h" #include "util.h" #include "iov.h" #include "passt.h" #include "packet.h" #include "log.h" +#ifdef HAS_GETRANDOM +#include <sys/random.h> +#endif /** * sock_l4_sa() - Create and bind socket to socket address, add to epoll list @@ -52,6 +56,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, { sa_family_t af = ((const struct sockaddr *)sa)->sa_family; union epoll_ref ref = { .type = type, .data = data }; + bool freebind = false; struct epoll_event ev; int fd, y = 1, ret; uint8_t proto; @@ -61,8 +66,11 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, case EPOLL_TYPE_TCP_LISTEN: proto = IPPROTO_TCP; socktype = SOCK_STREAM | SOCK_NONBLOCK; + freebind = c->freebind; break; case EPOLL_TYPE_UDP_LISTEN: + freebind = c->freebind; + /* fallthrough */ case EPOLL_TYPE_UDP_REPLY: proto = IPPROTO_UDP; socktype = SOCK_DGRAM | SOCK_NONBLOCK; @@ -82,7 +90,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, ret = -errno; if (fd < 0) { - warn("L4 socket: %s", strerror(-ret)); + warn("L4 socket: %s", strerror_(-ret)); return ret; } @@ -127,6 +135,18 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, } } + if (freebind) { + int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6; + int opt = af == AF_INET ? IP_FREEBIND : IPV6_FREEBIND; + + if (setsockopt(fd, level, opt, &y, sizeof(y))) { + err_perror("Failed to set %s on socket %i", + af == AF_INET ? "IP_FREEBIND" + : "IPV6_FREEBIND", + fd); + } + } + if (bind(fd, sa, sl) < 0) { /* We'll fail to bind to low ports if we don't have enough * capabilities, and we'll fail to bind on already bound ports, @@ -142,7 +162,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, if (type == EPOLL_TYPE_TCP_LISTEN && listen(fd, 128) < 0) { ret = -errno; - warn("TCP socket listen: %s", strerror(-ret)); + warn("TCP socket listen: %s", strerror_(-ret)); close(fd); return ret; } @@ -151,65 +171,12 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, ev.data.u64 = ref.u64; if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { ret = -errno; - warn("L4 epoll_ctl: %s", strerror(-ret)); + warn("L4 epoll_ctl: %s", strerror_(-ret)); return ret; } return fd; } -/** - * sock_l4() - Create and bind socket for given L4, add to epoll list - * @c: Execution context - * @af: Address family, AF_INET or AF_INET6 - * @type: epoll type - * @bind_addr: Address for binding, NULL for any - * @ifname: Interface for binding, NULL for any - * @port: Port, host order - * @data: epoll reference portion for protocol handlers - * - * Return: newly created socket, negative error code on failure - */ -int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type, - const void *bind_addr, const char *ifname, uint16_t port, - uint32_t data) -{ - switch (af) { - case AF_INET: { - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(port), - { 0 }, { 0 }, - }; - if (bind_addr) - addr4.sin_addr = *(struct in_addr *)bind_addr; - return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname, - false, data); - } - - case AF_UNSPEC: - if (!DUAL_STACK_SOCKETS || bind_addr) - return -EINVAL; - /* fallthrough */ - case AF_INET6: { - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(port), - 0, IN6ADDR_ANY_INIT, 0, - }; - if (bind_addr) { - addr6.sin6_addr = *(struct in6_addr *)bind_addr; - - if (!memcmp(bind_addr, &c->ip6.addr_ll, - sizeof(c->ip6.addr_ll))) - addr6.sin6_scope_id = c->ifi6; - } - return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname, - af == AF_INET6, data); - } - default: - return -EINVAL; - } -} /** * sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed @@ -220,7 +187,8 @@ void sock_probe_mem(struct ctx *c) int v = INT_MAX / 2, s; socklen_t sl; - if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { + s = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + if (s < 0) { c->low_wmem = c->low_rmem = 1; return; } @@ -250,7 +218,7 @@ void sock_probe_mem(struct ctx *c) int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b) { if (a->tv_nsec < b->tv_nsec) { - return (b->tv_nsec - a->tv_nsec) / 1000 + + return (a->tv_nsec + 1000000000 - b->tv_nsec) / 1000 + (a->tv_sec - b->tv_sec - 1) * 1000000; } @@ -444,25 +412,20 @@ void pidfile_write(int fd, pid_t pid) } /** - * pidfile_open() - Open PID file if needed - * @path: Path for PID file, empty string if no PID file is requested + * output_file_open() - Open file for output, if needed + * @path: Path for output file + * @flags: Flags for open() other than O_CREAT, O_TRUNC, O_CLOEXEC * - * Return: descriptor for PID file, -1 if path is NULL, won't return on failure + * Return: file descriptor on success, -1 on failure with errno set by open() */ -int pidfile_open(const char *path) +int output_file_open(const char *path, int flags) { - int fd; - - if (!*path) - return -1; - - if ((fd = open(path, O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC, - S_IRUSR | S_IWUSR)) < 0) { - perror("PID file open"); - exit(EXIT_FAILURE); - } - - return fd; + /* We use O_CLOEXEC here, but clang-tidy as of LLVM 16 to 19 looks for + * it in the 'mode' argument if we have one + */ + return open(path, O_CREAT | O_TRUNC | O_CLOEXEC | flags, + /* NOLINTNEXTLINE(android-cloexec-open) */ + S_IRUSR | S_IWUSR); } /** @@ -486,16 +449,11 @@ int __daemon(int pidfile_fd, int devnull_fd) exit(EXIT_SUCCESS); } - errno = 0; - - setsid(); - - dup2(devnull_fd, STDIN_FILENO); - dup2(devnull_fd, STDOUT_FILENO); - dup2(devnull_fd, STDERR_FILENO); - close(devnull_fd); - - if (errno) + if (setsid() < 0 || + dup2(devnull_fd, STDIN_FILENO) < 0 || + dup2(devnull_fd, STDOUT_FILENO) < 0 || + dup2(devnull_fd, STDERR_FILENO) < 0 || + close(devnull_fd)) exit(EXIT_FAILURE); return 0; @@ -583,6 +541,36 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, #endif } +/* write_all_buf() - write all of a buffer to an fd + * @fd: File descriptor + * @buf: Pointer to base of buffer + * @len: Length of buffer + * + * Return: 0 on success, -1 on error (with errno set) + * + * #syscalls write + */ +int write_all_buf(int fd, const void *buf, size_t len) +{ + const char *p = buf; + size_t left = len; + + while (left) { + ssize_t rc; + + do + rc = write(fd, p, left); + while ((rc < 0) && errno == EINTR); + + if (rc < 0) + return -1; + + p += rc; + left -= rc; + } + return 0; +} + /* write_remainder() - write the tail of an IO vector to an fd * @fd: File descriptor * @iov: IO vector @@ -591,28 +579,30 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, * * Return: 0 on success, -1 on error (with errno set) * - * #syscalls write writev + * #syscalls writev */ int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip) { - size_t offset, i; + size_t i = 0, offset; - while ((i = iov_skip_bytes(iov, iovcnt, skip, &offset)) < iovcnt) { + while ((i += iov_skip_bytes(iov + i, iovcnt - i, skip, &offset)) < iovcnt) { ssize_t rc; if (offset) { - rc = write(fd, (char *)iov[i].iov_base + offset, - iov[i].iov_len - offset); - } else { - rc = writev(fd, &iov[i], iovcnt - i); + /* Write the remainder of the partially written buffer */ + if (write_all_buf(fd, (char *)iov[i].iov_base + offset, + iov[i].iov_len - offset) < 0) + return -1; + i++; } + /* Write as much of the remaining whole buffers as we can */ + rc = writev(fd, &iov[i], iovcnt - i); if (rc < 0) return -1; - skip += rc; + skip = rc; } - return 0; } @@ -676,6 +666,25 @@ const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size) return dst; } +/** eth_ntop() - Convert an Ethernet MAC address to text format + * @mac: MAC address + * @dst: Output buffer, minimum ETH_ADDRSTRLEN bytes + * @size: Size of buffer at @dst + * + * Return: On success, a non-null pointer to @dst, NULL on failure + */ +const char *eth_ntop(const unsigned char *mac, char *dst, size_t size) +{ + int len; + + len = snprintf(dst, size, "%02x:%02x:%02x:%02x:%02x:%02x", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + if (len < 0 || (size_t)len >= size) + return NULL; + + return dst; +} + /** str_ee_origin() - Convert socket extended error origin to a string * @ee: Socket extended error structure * @@ -710,7 +719,7 @@ void close_open_files(int argc, char **argv) int name, rc; do { - name = getopt_long(argc, argv, "+:F", optfd, NULL); + name = getopt_long(argc, argv, "-:F:", optfd, NULL); if (name == 'F') { errno = 0; @@ -732,6 +741,99 @@ void close_open_files(int argc, char **argv) rc = close_range(fd + 1, ~0U, CLOSE_RANGE_UNSHARE); } - if (rc) - die_perror("Failed to close files leaked by parent"); + if (rc) { + if (errno == ENOSYS || errno == EINVAL) { + /* This probably means close_range() or the + * CLOSE_RANGE_UNSHARE flag is not supported by the + * kernel. Not much we can do here except carry on and + * hope for the best. + */ + warn( +"Can't use close_range() to ensure no files leaked by parent"); + } else { + die_perror("Failed to close files leaked by parent"); + } + } + +} + +/** + * snprintf_check() - snprintf() wrapper, checking for truncation and errors + * @str: Output buffer + * @size: Maximum size to write to @str + * @format: Message + * + * Return: false on success, true on truncation or error, sets errno on failure + */ +bool snprintf_check(char *str, size_t size, const char *format, ...) +{ + va_list ap; + int rc; + + va_start(ap, format); + rc = vsnprintf(str, size, format, ap); + va_end(ap); + + if (rc < 0) { + errno = EIO; + return true; + } + + if ((size_t)rc >= size) { + errno = ENOBUFS; + return true; + } + + return false; +} + +#define DEV_RANDOM "/dev/random" + +/** + * raw_random() - Get high quality random bytes + * @buf: Buffer to fill with random bytes + * @buflen: Number of bytes of random data to put in @buf + * + * Assumes that the random data is essential, and will die() if unable to obtain + * it. + */ +void raw_random(void *buf, size_t buflen) +{ + size_t random_read = 0; +#ifndef HAS_GETRANDOM + int fd = open(DEV_RANDOM, O_RDONLY); + + if (fd < 0) + die_perror("Couldn't open %s", DEV_RANDOM); +#endif + + while (random_read < buflen) { + ssize_t ret; + +#ifdef HAS_GETRANDOM + ret = getrandom((char *)buf + random_read, + buflen - random_read, GRND_RANDOM); +#else + ret = read(dev_random, (char *)buf + random_read, + buflen - random_read); +#endif + + if (ret == -1 && errno == EINTR) + continue; + + if (ret < 0) + die_perror("Error on random data source"); + + if (ret == 0) + break; + + random_read += ret; + } + +#ifndef HAS_GETRANDOM + close(dev_random); +#endif + + if (random_read < buflen) + die("Unexpected EOF on random data source"); } |