aboutgitcodebugslistschat
path: root/util.c
diff options
context:
space:
mode:
Diffstat (limited to 'util.c')
-rw-r--r--util.c736
1 files changed, 614 insertions, 122 deletions
diff --git a/util.c b/util.c
index 849fa7f..7b245cc 100644
--- a/util.c
+++ b/util.c
@@ -25,75 +25,72 @@
#include <time.h>
#include <errno.h>
#include <stdbool.h>
+#include <linux/errqueue.h>
+#include <getopt.h>
+#include "linux_dep.h"
#include "util.h"
#include "iov.h"
#include "passt.h"
#include "packet.h"
#include "log.h"
+#ifdef HAS_GETRANDOM
+#include <sys/random.h>
+#endif
/**
- * sock_l4() - Create and bind socket for given L4, add to epoll list
+ * sock_l4_sa() - Create and bind socket to socket address, add to epoll list
* @c: Execution context
- * @af: Address family, AF_INET or AF_INET6
- * @proto: Protocol number
- * @bind_addr: Address for binding, NULL for any
+ * @type: epoll type
+ * @sa: Socket address to bind to
+ * @sl: Length of @sa
* @ifname: Interface for binding, NULL for any
- * @port: Port, host order
+ * @v6only: Set IPV6_V6ONLY socket option
* @data: epoll reference portion for protocol handlers
*
* Return: newly created socket, negative error code on failure
*/
-int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
- const void *bind_addr, const char *ifname, uint16_t port,
- uint32_t data)
+int sock_l4_sa(const struct ctx *c, enum epoll_type type,
+ const void *sa, socklen_t sl,
+ const char *ifname, bool v6only, uint32_t data)
{
- union epoll_ref ref = { .data = data };
- struct sockaddr_in addr4 = {
- .sin_family = AF_INET,
- .sin_port = htons(port),
- { 0 }, { 0 },
- };
- struct sockaddr_in6 addr6 = {
- .sin6_family = AF_INET6,
- .sin6_port = htons(port),
- 0, IN6ADDR_ANY_INIT, 0,
- };
- const struct sockaddr *sa;
- bool dual_stack = false;
- int fd, sl, y = 1, ret;
+ sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
+ union epoll_ref ref = { .type = type, .data = data };
+ bool freebind = false;
struct epoll_event ev;
-
- switch (proto) {
- case IPPROTO_TCP:
- ref.type = EPOLL_TYPE_TCP_LISTEN;
+ int fd, y = 1, ret;
+ uint8_t proto;
+ int socktype;
+
+ switch (type) {
+ case EPOLL_TYPE_TCP_LISTEN:
+ proto = IPPROTO_TCP;
+ socktype = SOCK_STREAM | SOCK_NONBLOCK;
+ freebind = c->freebind;
break;
- case IPPROTO_UDP:
- ref.type = EPOLL_TYPE_UDP;
+ case EPOLL_TYPE_UDP_LISTEN:
+ freebind = c->freebind;
+ /* fallthrough */
+ case EPOLL_TYPE_UDP:
+ proto = IPPROTO_UDP;
+ socktype = SOCK_DGRAM | SOCK_NONBLOCK;
break;
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- ref.type = EPOLL_TYPE_PING;
+ case EPOLL_TYPE_PING:
+ if (af == AF_INET)
+ proto = IPPROTO_ICMP;
+ else
+ proto = IPPROTO_ICMPV6;
+ socktype = SOCK_DGRAM | SOCK_NONBLOCK;
break;
default:
- return -EPFNOSUPPORT; /* Not implemented. */
+ ASSERT(0);
}
- if (af == AF_UNSPEC) {
- if (!DUAL_STACK_SOCKETS || bind_addr)
- return -EINVAL;
- dual_stack = true;
- af = AF_INET6;
- }
-
- if (proto == IPPROTO_TCP)
- fd = socket(af, SOCK_STREAM | SOCK_NONBLOCK, proto);
- else
- fd = socket(af, SOCK_DGRAM | SOCK_NONBLOCK, proto);
+ fd = socket(af, socktype, proto);
ret = -errno;
if (fd < 0) {
- warn("L4 socket: %s", strerror(-ret));
+ warn("L4 socket: %s", strerror_(-ret));
return ret;
}
@@ -104,34 +101,25 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
ref.fd = fd;
- if (af == AF_INET) {
- if (bind_addr)
- addr4.sin_addr = *(struct in_addr *)bind_addr;
+ if (v6only)
+ if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &y, sizeof(y)))
+ debug("Failed to set IPV6_V6ONLY on socket %i", fd);
- sa = (const struct sockaddr *)&addr4;
- sl = sizeof(addr4);
- } else {
- if (bind_addr) {
- addr6.sin6_addr = *(struct in6_addr *)bind_addr;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)))
+ debug("Failed to set SO_REUSEADDR on socket %i", fd);
- if (!memcmp(bind_addr, &c->ip6.addr_ll,
- sizeof(c->ip6.addr_ll)))
- addr6.sin6_scope_id = c->ifi6;
- }
+ if (proto == IPPROTO_UDP) {
+ int pktinfo = af == AF_INET ? IP_PKTINFO : IPV6_RECVPKTINFO;
+ int recverr = af == AF_INET ? IP_RECVERR : IPV6_RECVERR;
+ int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
- sa = (const struct sockaddr *)&addr6;
- sl = sizeof(addr6);
+ if (setsockopt(fd, level, recverr, &y, sizeof(y)))
+ die_perror("Failed to set RECVERR on socket %i", fd);
- if (!dual_stack)
- if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
- &y, sizeof(y)))
- debug("Failed to set IPV6_V6ONLY on socket %i",
- fd);
+ if (setsockopt(fd, level, pktinfo, &y, sizeof(y)))
+ die_perror("Failed to set PKTINFO on socket %i", fd);
}
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)))
- debug("Failed to set SO_REUSEADDR on socket %i", fd);
-
if (ifname && *ifname) {
/* Supported since kernel version 5.7, commit c427bfec18f2
* ("net: core: enable SO_BINDTODEVICE for non-root users"). If
@@ -140,30 +128,45 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
*/
if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
ifname, strlen(ifname))) {
+ char str[SOCKADDR_STRLEN];
+
ret = -errno;
- warn("Can't bind %s socket for port %u to %s, closing",
- EPOLL_TYPE_STR(proto), port, ifname);
+ warn("Can't bind %s socket for %s to %s, closing",
+ EPOLL_TYPE_STR(proto),
+ sockaddr_ntop(sa, str, sizeof(str)), ifname);
close(fd);
return ret;
}
}
+ if (freebind) {
+ int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
+ int opt = af == AF_INET ? IP_FREEBIND : IPV6_FREEBIND;
+
+ if (setsockopt(fd, level, opt, &y, sizeof(y))) {
+ err_perror("Failed to set %s on socket %i",
+ af == AF_INET ? "IP_FREEBIND"
+ : "IPV6_FREEBIND",
+ fd);
+ }
+ }
+
if (bind(fd, sa, sl) < 0) {
/* We'll fail to bind to low ports if we don't have enough
* capabilities, and we'll fail to bind on already bound ports,
* this is fine. This might also fail for ICMP because of a
* broken SELinux policy, see icmp_tap_handler().
*/
- if (proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) {
+ if (type != EPOLL_TYPE_PING) {
ret = -errno;
close(fd);
return ret;
}
}
- if (proto == IPPROTO_TCP && listen(fd, 128) < 0) {
+ if (type == EPOLL_TYPE_TCP_LISTEN && listen(fd, 128) < 0) {
ret = -errno;
- warn("TCP socket listen: %s", strerror(-ret));
+ warn("TCP socket listen: %s", strerror_(-ret));
close(fd);
return ret;
}
@@ -172,7 +175,7 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
ev.data.u64 = ref.u64;
if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) {
ret = -errno;
- warn("L4 epoll_ctl: %s", strerror(-ret));
+ warn("L4 epoll_ctl: %s", strerror_(-ret));
return ret;
}
@@ -180,6 +183,68 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
}
/**
+ * sock_unix() - Create and bind AF_UNIX socket
+ * @sock_path: Socket path. If empty, set on return (UNIX_SOCK_PATH as prefix)
+ *
+ * Return: socket descriptor on success, won't return on failure
+ */
+int sock_unix(char *sock_path)
+{
+ int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ int i;
+
+ if (fd < 0)
+ die_perror("Failed to open UNIX domain socket");
+
+ for (i = 1; i < UNIX_SOCK_MAX; i++) {
+ char *path = addr.sun_path;
+ int ex, ret;
+
+ if (*sock_path)
+ memcpy(path, sock_path, UNIX_PATH_MAX);
+ else if (snprintf_check(path, UNIX_PATH_MAX - 1,
+ UNIX_SOCK_PATH, i))
+ die_perror("Can't build UNIX domain socket path");
+
+ ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC,
+ 0);
+ if (ex < 0)
+ die_perror("Failed to check for UNIX domain conflicts");
+
+ ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
+ if (!ret || (errno != ENOENT && errno != ECONNREFUSED &&
+ errno != EACCES)) {
+ if (*sock_path)
+ die("Socket path %s already in use", path);
+
+ close(ex);
+ continue;
+ }
+ close(ex);
+
+ unlink(path);
+ ret = bind(fd, (const struct sockaddr *)&addr, sizeof(addr));
+ if (*sock_path && ret)
+ die_perror("Failed to bind UNIX domain socket");
+
+ if (!ret)
+ break;
+ }
+
+ if (i == UNIX_SOCK_MAX)
+ die_perror("Failed to bind UNIX domain socket");
+
+ info("UNIX domain socket bound at %s", addr.sun_path);
+ if (!*sock_path)
+ memcpy(sock_path, addr.sun_path, UNIX_PATH_MAX);
+
+ return fd;
+}
+
+/**
* sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed
* @c: Execution context
*/
@@ -188,7 +253,8 @@ void sock_probe_mem(struct ctx *c)
int v = INT_MAX / 2, s;
socklen_t sl;
- if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+ s = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
+ if (s < 0) {
c->low_wmem = c->low_rmem = 1;
return;
}
@@ -208,23 +274,34 @@ void sock_probe_mem(struct ctx *c)
close(s);
}
-
/**
- * timespec_diff_ms() - Report difference in milliseconds between two timestamps
+ * timespec_diff_us() - Report difference in microseconds between two timestamps
* @a: Minuend timestamp
* @b: Subtrahend timestamp
*
- * Return: difference in milliseconds
+ * Return: difference in microseconds (wraps after 2^63 / 10^6s ~= 292k years)
*/
-int timespec_diff_ms(const struct timespec *a, const struct timespec *b)
+int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b)
{
if (a->tv_nsec < b->tv_nsec) {
- return (b->tv_nsec - a->tv_nsec) / 1000000 +
- (a->tv_sec - b->tv_sec - 1) * 1000;
+ return (a->tv_nsec + 1000000000 - b->tv_nsec) / 1000 +
+ (a->tv_sec - b->tv_sec - 1) * 1000000;
}
- return (a->tv_nsec - b->tv_nsec) / 1000000 +
- (a->tv_sec - b->tv_sec) * 1000;
+ return (a->tv_nsec - b->tv_nsec) / 1000 +
+ (a->tv_sec - b->tv_sec) * 1000000;
+}
+
+/**
+ * timespec_diff_ms() - Report difference in milliseconds between two timestamps
+ * @a: Minuend timestamp
+ * @b: Subtrahend timestamp
+ *
+ * Return: difference in milliseconds
+ */
+long timespec_diff_ms(const struct timespec *a, const struct timespec *b)
+{
+ return timespec_diff_us(a, b) / 1000;
}
/**
@@ -232,7 +309,7 @@ int timespec_diff_ms(const struct timespec *a, const struct timespec *b)
* @map: Pointer to bitmap
* @bit: Bit number to set
*/
-void bitmap_set(uint8_t *map, int bit)
+void bitmap_set(uint8_t *map, unsigned bit)
{
unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit);
@@ -244,7 +321,7 @@ void bitmap_set(uint8_t *map, int bit)
* @map: Pointer to bitmap
* @bit: Bit number to clear
*/
-void bitmap_clear(uint8_t *map, int bit)
+void bitmap_clear(uint8_t *map, unsigned bit)
{
unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit);
@@ -256,9 +333,9 @@ void bitmap_clear(uint8_t *map, int bit)
* @map: Pointer to bitmap
* @bit: Bit number to check
*
- * Return: one if given bit is set, zero if it's not
+ * Return: true if given bit is set, false if it's not
*/
-int bitmap_isset(const uint8_t *map, int bit)
+bool bitmap_isset(const uint8_t *map, unsigned bit)
{
const unsigned long *word
= (const unsigned long *)map + BITMAP_WORD(bit);
@@ -287,7 +364,7 @@ void bitmap_or(uint8_t *dst, size_t size, const uint8_t *a, const uint8_t *b)
dst[i] = a[i] | b[i];
}
-/*
+/**
* ns_enter() - Enter configured user (unless already joined) and network ns
* @c: Execution context
*
@@ -298,7 +375,7 @@ void bitmap_or(uint8_t *dst, size_t size, const uint8_t *a, const uint8_t *b)
void ns_enter(const struct ctx *c)
{
if (setns(c->pasta_netns_fd, CLONE_NEWNET))
- die("setns() failed entering netns: %s", strerror(errno));
+ die_perror("setns() failed entering netns");
}
/**
@@ -313,10 +390,8 @@ bool ns_is_init(void)
bool ret = true;
int fd;
- if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0) {
- die("Can't determine if we're in init namespace: %s",
- strerror(errno));
- }
+ if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
+ die_perror("Can't determine if we're in init namespace");
if (read(fd, buf, sizeof(root_uid_map)) != sizeof(root_uid_map) - 1 ||
strncmp(buf, root_uid_map, sizeof(root_uid_map)))
@@ -380,11 +455,11 @@ int open_in_ns(const struct ctx *c, const char *path, int flags)
}
/**
- * pid_file() - Write PID to file, if requested to do so, and close it
+ * pidfile_write() - Write PID to file, if requested to do so, and close it
* @fd: Open PID file descriptor, closed on exit, -1 to skip writing it
* @pid: PID value to write
*/
-void write_pidfile(int fd, pid_t pid)
+void pidfile_write(int fd, pid_t pid)
{
char pid_buf[12];
int n;
@@ -396,18 +471,36 @@ void write_pidfile(int fd, pid_t pid)
if (write(fd, pid_buf, n) < 0) {
perror("PID file write");
- exit(EXIT_FAILURE);
+ _exit(EXIT_FAILURE);
}
close(fd);
}
/**
+ * output_file_open() - Open file for output, if needed
+ * @path: Path for output file
+ * @flags: Flags for open() other than O_CREAT, O_TRUNC, O_CLOEXEC
+ *
+ * Return: file descriptor on success, -1 on failure with errno set by open()
+ */
+int output_file_open(const char *path, int flags)
+{
+ /* We use O_CLOEXEC here, but clang-tidy as of LLVM 16 to 19 looks for
+ * it in the 'mode' argument if we have one
+ */
+ return open(path, O_CREAT | O_TRUNC | O_CLOEXEC | flags,
+ /* NOLINTNEXTLINE(android-cloexec-open) */
+ S_IRUSR | S_IWUSR);
+}
+
+/**
* __daemon() - daemon()-like function writing PID file before parent exits
* @pidfile_fd: Open PID file descriptor
* @devnull_fd: Open file descriptor for /dev/null
*
- * Return: child PID on success, won't return on failure
+ * Return: 0 in the child process on success. The parent process exits.
+ * Does not return in either process on failure (calls _exit).
*/
int __daemon(int pidfile_fd, int devnull_fd)
{
@@ -415,25 +508,20 @@ int __daemon(int pidfile_fd, int devnull_fd)
if (pid == -1) {
perror("fork");
- exit(EXIT_FAILURE);
+ _exit(EXIT_FAILURE);
}
if (pid) {
- write_pidfile(pidfile_fd, pid);
- exit(EXIT_SUCCESS);
+ pidfile_write(pidfile_fd, pid);
+ _exit(EXIT_SUCCESS);
}
- errno = 0;
-
- setsid();
-
- dup2(devnull_fd, STDIN_FILENO);
- dup2(devnull_fd, STDOUT_FILENO);
- dup2(devnull_fd, STDERR_FILENO);
- close(devnull_fd);
-
- if (errno)
- exit(EXIT_FAILURE);
+ if (setsid() < 0 ||
+ dup2(devnull_fd, STDIN_FILENO) < 0 ||
+ dup2(devnull_fd, STDOUT_FILENO) < 0 ||
+ dup2(devnull_fd, STDERR_FILENO) < 0 ||
+ close(devnull_fd))
+ _exit(EXIT_FAILURE);
return 0;
}
@@ -470,7 +558,7 @@ int write_file(const char *path, const char *buf)
size_t len = strlen(buf);
if (fd < 0) {
- warn("Could not open %s: %s", path, strerror(errno));
+ warn_perror("Could not open %s", path);
return -1;
}
@@ -478,7 +566,7 @@ int write_file(const char *path, const char *buf)
ssize_t rc = write(fd, buf, len);
if (rc <= 0) {
- warn("Couldn't write to %s: %s", path, strerror(errno));
+ warn_perror("Couldn't write to %s", path);
break;
}
@@ -520,7 +608,39 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
#endif
}
-/* write_remainder() - write the tail of an IO vector to an fd
+/**
+ * write_all_buf() - write all of a buffer to an fd
+ * @fd: File descriptor
+ * @buf: Pointer to base of buffer
+ * @len: Length of buffer
+ *
+ * Return: 0 on success, -1 on error (with errno set)
+ *
+ * #syscalls write
+ */
+int write_all_buf(int fd, const void *buf, size_t len)
+{
+ const char *p = buf;
+ size_t left = len;
+
+ while (left) {
+ ssize_t rc;
+
+ do
+ rc = write(fd, p, left);
+ while ((rc < 0) && errno == EINTR);
+
+ if (rc < 0)
+ return -1;
+
+ p += rc;
+ left -= rc;
+ }
+ return 0;
+}
+
+/**
+ * write_remainder() - write the tail of an IO vector to an fd
* @fd: File descriptor
* @iov: IO vector
* @iovcnt: Number of entries in @iov
@@ -528,28 +648,400 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
*
* Return: 0 on success, -1 on error (with errno set)
*
- * #syscalls write writev
+ * #syscalls writev
*/
-int write_remainder(int fd, const struct iovec *iov, int iovcnt, size_t skip)
+int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip)
{
- int i;
- size_t offset;
+ size_t i = 0, offset;
- while ((i = iov_skip_bytes(iov, iovcnt, skip, &offset)) < iovcnt) {
+ while ((i += iov_skip_bytes(iov + i, iovcnt - i, skip, &offset)) < iovcnt) {
ssize_t rc;
if (offset) {
- rc = write(fd, (char *)iov[i].iov_base + offset,
- iov[i].iov_len - offset);
- } else {
- rc = writev(fd, &iov[i], iovcnt - i);
+ /* Write the remainder of the partially written buffer */
+ if (write_all_buf(fd, (char *)iov[i].iov_base + offset,
+ iov[i].iov_len - offset) < 0)
+ return -1;
+ i++;
}
+ /* Write as much of the remaining whole buffers as we can */
+ rc = writev(fd, &iov[i], iovcnt - i);
if (rc < 0)
return -1;
- skip += rc;
+ skip = rc;
}
+ return 0;
+}
+/**
+ * read_all_buf() - Fill a whole buffer from a file descriptor
+ * @fd: File descriptor
+ * @buf: Pointer to base of buffer
+ * @len: Length of buffer
+ *
+ * Return: 0 on success, -1 on error (with errno set)
+ *
+ * #syscalls read
+ */
+int read_all_buf(int fd, void *buf, size_t len)
+{
+ size_t left = len;
+ char *p = buf;
+
+ while (left) {
+ ssize_t rc;
+
+ ASSERT(left <= len);
+
+ do
+ rc = read(fd, p, left);
+ while ((rc < 0) && errno == EINTR);
+
+ if (rc < 0)
+ return -1;
+
+ if (rc == 0) {
+ errno = ENODATA;
+ return -1;
+ }
+
+ p += rc;
+ left -= rc;
+ }
return 0;
}
+
+/**
+ * read_remainder() - Read the tail of an IO vector from a file descriptor
+ * @fd: File descriptor
+ * @iov: IO vector
+ * @cnt: Number of entries in @iov
+ * @skip: Number of bytes of the vector to skip reading
+ *
+ * Return: 0 on success, -1 on error (with errno set)
+ *
+ * Note: mode-specific seccomp profiles need to enable readv() to use this.
+ */
+/* cppcheck-suppress unusedFunction */
+int read_remainder(int fd, const struct iovec *iov, size_t cnt, size_t skip)
+{
+ size_t i = 0, offset;
+
+ while ((i += iov_skip_bytes(iov + i, cnt - i, skip, &offset)) < cnt) {
+ ssize_t rc;
+
+ if (offset) {
+ ASSERT(offset < iov[i].iov_len);
+ /* Read the remainder of the partially read buffer */
+ if (read_all_buf(fd, (char *)iov[i].iov_base + offset,
+ iov[i].iov_len - offset) < 0)
+ return -1;
+ i++;
+ }
+
+ if (cnt == i)
+ break;
+
+ /* Fill as many of the remaining buffers as we can */
+ rc = readv(fd, &iov[i], cnt - i);
+ if (rc < 0)
+ return -1;
+
+ if (rc == 0) {
+ errno = ENODATA;
+ return -1;
+ }
+
+ skip = rc;
+ }
+ return 0;
+}
+
+/** sockaddr_ntop() - Convert a socket address to text format
+ * @sa: Socket address
+ * @dst: output buffer, minimum SOCKADDR_STRLEN bytes
+ * @size: size of buffer at @dst
+ *
+ * Return: On success, a non-null pointer to @dst, NULL on failure
+ */
+const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size)
+{
+ sa_family_t family = ((const struct sockaddr *)sa)->sa_family;
+ socklen_t off = 0;
+
+#define IPRINTF(...) \
+ do { \
+ off += snprintf(dst + off, size - off, __VA_ARGS__); \
+ if (off >= size) \
+ return NULL; \
+ } while (0)
+
+#define INTOP(af, addr) \
+ do { \
+ if (!inet_ntop((af), (addr), dst + off, size - off)) \
+ return NULL; \
+ off += strlen(dst + off); \
+ } while (0)
+
+ switch (family) {
+ case AF_UNSPEC:
+ IPRINTF("<unspecified>");
+ break;
+
+ case AF_INET: {
+ const struct sockaddr_in *sa4 = sa;
+
+ INTOP(AF_INET, &sa4->sin_addr);
+ IPRINTF(":%hu", ntohs(sa4->sin_port));
+ break;
+ }
+
+ case AF_INET6: {
+ const struct sockaddr_in6 *sa6 = sa;
+
+ IPRINTF("[");
+ INTOP(AF_INET6, &sa6->sin6_addr);
+ IPRINTF("]:%hu", ntohs(sa6->sin6_port));
+ break;
+ }
+
+ /* FIXME: Implement AF_UNIX */
+ default:
+ errno = EAFNOSUPPORT;
+ return NULL;
+ }
+
+#undef IPRINTF
+#undef INTOP
+
+ return dst;
+}
+
+/** eth_ntop() - Convert an Ethernet MAC address to text format
+ * @mac: MAC address
+ * @dst: Output buffer, minimum ETH_ADDRSTRLEN bytes
+ * @size: Size of buffer at @dst
+ *
+ * Return: On success, a non-null pointer to @dst, NULL on failure
+ */
+const char *eth_ntop(const unsigned char *mac, char *dst, size_t size)
+{
+ int len;
+
+ len = snprintf(dst, size, "%02x:%02x:%02x:%02x:%02x:%02x",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ if (len < 0 || (size_t)len >= size)
+ return NULL;
+
+ return dst;
+}
+
+/** str_ee_origin() - Convert socket extended error origin to a string
+ * @ee: Socket extended error structure
+ *
+ * Return: Static string describing error origin
+ */
+const char *str_ee_origin(const struct sock_extended_err *ee)
+{
+ const char *const desc[] = {
+ [SO_EE_ORIGIN_NONE] = "<no origin>",
+ [SO_EE_ORIGIN_LOCAL] = "Local",
+ [SO_EE_ORIGIN_ICMP] = "ICMP",
+ [SO_EE_ORIGIN_ICMP6] = "ICMPv6",
+ };
+
+ if (ee->ee_origin < ARRAY_SIZE(desc))
+ return desc[ee->ee_origin];
+
+ return "<invalid>";
+}
+
+/**
+ * close_open_files() - Close leaked files, but not --fd, stdin, stdout, stderr
+ * @argc: Argument count
+ * @argv: Command line options, as we need to skip any file given via --fd
+ */
+void close_open_files(int argc, char **argv)
+{
+ const struct option optfd[] = { { "fd", required_argument, NULL, 'F' },
+ { 0 },
+ };
+ long fd = -1;
+ int name, rc;
+
+ do {
+ name = getopt_long(argc, argv, "-:F:", optfd, NULL);
+
+ if (name == 'F') {
+ errno = 0;
+ fd = strtol(optarg, NULL, 0);
+
+ if (errno ||
+ (fd != STDIN_FILENO && fd <= STDERR_FILENO) ||
+ fd > INT_MAX)
+ die("Invalid --fd: %s", optarg);
+ }
+ } while (name != -1);
+
+ if (fd == -1) {
+ rc = close_range(STDERR_FILENO + 1, ~0U, CLOSE_RANGE_UNSHARE);
+ } else if (fd == STDERR_FILENO + 1) { /* Still a single range */
+ rc = close_range(STDERR_FILENO + 2, ~0U, CLOSE_RANGE_UNSHARE);
+ } else {
+ rc = close_range(STDERR_FILENO + 1, fd - 1,
+ CLOSE_RANGE_UNSHARE);
+ if (!rc)
+ rc = close_range(fd + 1, ~0U, CLOSE_RANGE_UNSHARE);
+ }
+
+ if (rc) {
+ if (errno == ENOSYS || errno == EINVAL) {
+ /* This probably means close_range() or the
+ * CLOSE_RANGE_UNSHARE flag is not supported by the
+ * kernel. Not much we can do here except carry on and
+ * hope for the best.
+ */
+ warn(
+"Can't use close_range() to ensure no files leaked by parent");
+ } else {
+ die_perror("Failed to close files leaked by parent");
+ }
+ }
+
+}
+
+/**
+ * snprintf_check() - snprintf() wrapper, checking for truncation and errors
+ * @str: Output buffer
+ * @size: Maximum size to write to @str
+ * @format: Message
+ *
+ * Return: false on success, true on truncation or error, sets errno on failure
+ */
+bool snprintf_check(char *str, size_t size, const char *format, ...)
+{
+ va_list ap;
+ int rc;
+
+ va_start(ap, format);
+ rc = vsnprintf(str, size, format, ap);
+ va_end(ap);
+
+ if (rc < 0) {
+ errno = EIO;
+ return true;
+ }
+
+ if ((size_t)rc >= size) {
+ errno = ENOBUFS;
+ return true;
+ }
+
+ return false;
+}
+
+#define DEV_RANDOM "/dev/random"
+
+/**
+ * raw_random() - Get high quality random bytes
+ * @buf: Buffer to fill with random bytes
+ * @buflen: Number of bytes of random data to put in @buf
+ *
+ * Assumes that the random data is essential, and will die() if unable to obtain
+ * it.
+ */
+void raw_random(void *buf, size_t buflen)
+{
+ size_t random_read = 0;
+#ifndef HAS_GETRANDOM
+ int fd = open(DEV_RANDOM, O_RDONLY);
+
+ if (fd < 0)
+ die_perror("Couldn't open %s", DEV_RANDOM);
+#endif
+
+ while (random_read < buflen) {
+ ssize_t ret;
+
+#ifdef HAS_GETRANDOM
+ ret = getrandom((char *)buf + random_read,
+ buflen - random_read, GRND_RANDOM);
+#else
+ ret = read(dev_random, (char *)buf + random_read,
+ buflen - random_read);
+#endif
+
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret < 0)
+ die_perror("Error on random data source");
+
+ if (ret == 0)
+ break;
+
+ random_read += ret;
+ }
+
+#ifndef HAS_GETRANDOM
+ close(dev_random);
+#endif
+
+ if (random_read < buflen)
+ die("Unexpected EOF on random data source");
+}
+
+/**
+ * epoll_del() - Remove a file descriptor from our passt epoll
+ * @c: Execution context
+ * @fd: File descriptor to remove
+ */
+void epoll_del(const struct ctx *c, int fd)
+{
+ epoll_ctl(c->epollfd, EPOLL_CTL_DEL, fd, NULL);
+
+}
+
+/**
+ * encode_domain_name() - Encode domain name according to RFC 1035, section 3.1
+ * @buf: Buffer to fill in with encoded domain name
+ * @domain_name: Input domain name string with terminator
+ *
+ * The buffer's 'buf' size has to be >= strlen(domain_name) + 2
+ */
+void encode_domain_name(char *buf, const char *domain_name)
+{
+ size_t i;
+ char *p;
+
+ buf[0] = strcspn(domain_name, ".");
+ p = buf + 1;
+ for (i = 0; domain_name[i]; i++) {
+ if (domain_name[i] == '.')
+ p[i] = strcspn(domain_name + i + 1, ".");
+ else
+ p[i] = domain_name[i];
+ }
+ p[i] = 0L;
+}
+
+/**
+ * abort_with_msg() - Print error message and abort
+ * @fmt: Format string
+ * @...: Format parameters
+ */
+void abort_with_msg(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vlogmsg(true, false, LOG_CRIT, fmt, ap);
+ va_end(ap);
+
+ /* This may actually cause a SIGSYS instead of SIGABRT, due to seccomp,
+ * but that will still get the job done.
+ */
+ abort();
+}