aboutgitcodebugslistschat
path: root/util.c
diff options
context:
space:
mode:
Diffstat (limited to 'util.c')
-rw-r--r--util.c294
1 files changed, 198 insertions, 96 deletions
diff --git a/util.c b/util.c
index 0b41404..11973c4 100644
--- a/util.c
+++ b/util.c
@@ -28,11 +28,15 @@
#include <linux/errqueue.h>
#include <getopt.h>
+#include "linux_dep.h"
#include "util.h"
#include "iov.h"
#include "passt.h"
#include "packet.h"
#include "log.h"
+#ifdef HAS_GETRANDOM
+#include <sys/random.h>
+#endif
/**
* sock_l4_sa() - Create and bind socket to socket address, add to epoll list
@@ -52,6 +56,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
{
sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
union epoll_ref ref = { .type = type, .data = data };
+ bool freebind = false;
struct epoll_event ev;
int fd, y = 1, ret;
uint8_t proto;
@@ -61,8 +66,11 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
case EPOLL_TYPE_TCP_LISTEN:
proto = IPPROTO_TCP;
socktype = SOCK_STREAM | SOCK_NONBLOCK;
+ freebind = c->freebind;
break;
case EPOLL_TYPE_UDP_LISTEN:
+ freebind = c->freebind;
+ /* fallthrough */
case EPOLL_TYPE_UDP_REPLY:
proto = IPPROTO_UDP;
socktype = SOCK_DGRAM | SOCK_NONBLOCK;
@@ -82,7 +90,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
ret = -errno;
if (fd < 0) {
- warn("L4 socket: %s", strerror(-ret));
+ warn("L4 socket: %s", strerror_(-ret));
return ret;
}
@@ -127,6 +135,18 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
}
}
+ if (freebind) {
+ int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
+ int opt = af == AF_INET ? IP_FREEBIND : IPV6_FREEBIND;
+
+ if (setsockopt(fd, level, opt, &y, sizeof(y))) {
+ err_perror("Failed to set %s on socket %i",
+ af == AF_INET ? "IP_FREEBIND"
+ : "IPV6_FREEBIND",
+ fd);
+ }
+ }
+
if (bind(fd, sa, sl) < 0) {
/* We'll fail to bind to low ports if we don't have enough
* capabilities, and we'll fail to bind on already bound ports,
@@ -142,7 +162,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
if (type == EPOLL_TYPE_TCP_LISTEN && listen(fd, 128) < 0) {
ret = -errno;
- warn("TCP socket listen: %s", strerror(-ret));
+ warn("TCP socket listen: %s", strerror_(-ret));
close(fd);
return ret;
}
@@ -151,65 +171,12 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
ev.data.u64 = ref.u64;
if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) {
ret = -errno;
- warn("L4 epoll_ctl: %s", strerror(-ret));
+ warn("L4 epoll_ctl: %s", strerror_(-ret));
return ret;
}
return fd;
}
-/**
- * sock_l4() - Create and bind socket for given L4, add to epoll list
- * @c: Execution context
- * @af: Address family, AF_INET or AF_INET6
- * @type: epoll type
- * @bind_addr: Address for binding, NULL for any
- * @ifname: Interface for binding, NULL for any
- * @port: Port, host order
- * @data: epoll reference portion for protocol handlers
- *
- * Return: newly created socket, negative error code on failure
- */
-int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
- const void *bind_addr, const char *ifname, uint16_t port,
- uint32_t data)
-{
- switch (af) {
- case AF_INET: {
- struct sockaddr_in addr4 = {
- .sin_family = AF_INET,
- .sin_port = htons(port),
- { 0 }, { 0 },
- };
- if (bind_addr)
- addr4.sin_addr = *(struct in_addr *)bind_addr;
- return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname,
- false, data);
- }
-
- case AF_UNSPEC:
- if (!DUAL_STACK_SOCKETS || bind_addr)
- return -EINVAL;
- /* fallthrough */
- case AF_INET6: {
- struct sockaddr_in6 addr6 = {
- .sin6_family = AF_INET6,
- .sin6_port = htons(port),
- 0, IN6ADDR_ANY_INIT, 0,
- };
- if (bind_addr) {
- addr6.sin6_addr = *(struct in6_addr *)bind_addr;
-
- if (!memcmp(bind_addr, &c->ip6.addr_ll,
- sizeof(c->ip6.addr_ll)))
- addr6.sin6_scope_id = c->ifi6;
- }
- return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname,
- af == AF_INET6, data);
- }
- default:
- return -EINVAL;
- }
-}
/**
* sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed
@@ -220,7 +187,8 @@ void sock_probe_mem(struct ctx *c)
int v = INT_MAX / 2, s;
socklen_t sl;
- if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+ s = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
+ if (s < 0) {
c->low_wmem = c->low_rmem = 1;
return;
}
@@ -250,7 +218,7 @@ void sock_probe_mem(struct ctx *c)
int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b)
{
if (a->tv_nsec < b->tv_nsec) {
- return (b->tv_nsec - a->tv_nsec) / 1000 +
+ return (a->tv_nsec + 1000000000 - b->tv_nsec) / 1000 +
(a->tv_sec - b->tv_sec - 1) * 1000000;
}
@@ -444,25 +412,20 @@ void pidfile_write(int fd, pid_t pid)
}
/**
- * pidfile_open() - Open PID file if needed
- * @path: Path for PID file, empty string if no PID file is requested
+ * output_file_open() - Open file for output, if needed
+ * @path: Path for output file
+ * @flags: Flags for open() other than O_CREAT, O_TRUNC, O_CLOEXEC
*
- * Return: descriptor for PID file, -1 if path is NULL, won't return on failure
+ * Return: file descriptor on success, -1 on failure with errno set by open()
*/
-int pidfile_open(const char *path)
+int output_file_open(const char *path, int flags)
{
- int fd;
-
- if (!*path)
- return -1;
-
- if ((fd = open(path, O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC,
- S_IRUSR | S_IWUSR)) < 0) {
- perror("PID file open");
- exit(EXIT_FAILURE);
- }
-
- return fd;
+ /* We use O_CLOEXEC here, but clang-tidy as of LLVM 16 to 19 looks for
+ * it in the 'mode' argument if we have one
+ */
+ return open(path, O_CREAT | O_TRUNC | O_CLOEXEC | flags,
+ /* NOLINTNEXTLINE(android-cloexec-open) */
+ S_IRUSR | S_IWUSR);
}
/**
@@ -486,16 +449,11 @@ int __daemon(int pidfile_fd, int devnull_fd)
exit(EXIT_SUCCESS);
}
- errno = 0;
-
- setsid();
-
- dup2(devnull_fd, STDIN_FILENO);
- dup2(devnull_fd, STDOUT_FILENO);
- dup2(devnull_fd, STDERR_FILENO);
- close(devnull_fd);
-
- if (errno)
+ if (setsid() < 0 ||
+ dup2(devnull_fd, STDIN_FILENO) < 0 ||
+ dup2(devnull_fd, STDOUT_FILENO) < 0 ||
+ dup2(devnull_fd, STDERR_FILENO) < 0 ||
+ close(devnull_fd))
exit(EXIT_FAILURE);
return 0;
@@ -583,6 +541,36 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
#endif
}
+/* write_all_buf() - write all of a buffer to an fd
+ * @fd: File descriptor
+ * @buf: Pointer to base of buffer
+ * @len: Length of buffer
+ *
+ * Return: 0 on success, -1 on error (with errno set)
+ *
+ * #syscalls write
+ */
+int write_all_buf(int fd, const void *buf, size_t len)
+{
+ const char *p = buf;
+ size_t left = len;
+
+ while (left) {
+ ssize_t rc;
+
+ do
+ rc = write(fd, p, left);
+ while ((rc < 0) && errno == EINTR);
+
+ if (rc < 0)
+ return -1;
+
+ p += rc;
+ left -= rc;
+ }
+ return 0;
+}
+
/* write_remainder() - write the tail of an IO vector to an fd
* @fd: File descriptor
* @iov: IO vector
@@ -591,28 +579,30 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
*
* Return: 0 on success, -1 on error (with errno set)
*
- * #syscalls write writev
+ * #syscalls writev
*/
int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip)
{
- size_t offset, i;
+ size_t i = 0, offset;
- while ((i = iov_skip_bytes(iov, iovcnt, skip, &offset)) < iovcnt) {
+ while ((i += iov_skip_bytes(iov + i, iovcnt - i, skip, &offset)) < iovcnt) {
ssize_t rc;
if (offset) {
- rc = write(fd, (char *)iov[i].iov_base + offset,
- iov[i].iov_len - offset);
- } else {
- rc = writev(fd, &iov[i], iovcnt - i);
+ /* Write the remainder of the partially written buffer */
+ if (write_all_buf(fd, (char *)iov[i].iov_base + offset,
+ iov[i].iov_len - offset) < 0)
+ return -1;
+ i++;
}
+ /* Write as much of the remaining whole buffers as we can */
+ rc = writev(fd, &iov[i], iovcnt - i);
if (rc < 0)
return -1;
- skip += rc;
+ skip = rc;
}
-
return 0;
}
@@ -676,6 +666,25 @@ const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size)
return dst;
}
+/** eth_ntop() - Convert an Ethernet MAC address to text format
+ * @mac: MAC address
+ * @dst: Output buffer, minimum ETH_ADDRSTRLEN bytes
+ * @size: Size of buffer at @dst
+ *
+ * Return: On success, a non-null pointer to @dst, NULL on failure
+ */
+const char *eth_ntop(const unsigned char *mac, char *dst, size_t size)
+{
+ int len;
+
+ len = snprintf(dst, size, "%02x:%02x:%02x:%02x:%02x:%02x",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ if (len < 0 || (size_t)len >= size)
+ return NULL;
+
+ return dst;
+}
+
/** str_ee_origin() - Convert socket extended error origin to a string
* @ee: Socket extended error structure
*
@@ -710,7 +719,7 @@ void close_open_files(int argc, char **argv)
int name, rc;
do {
- name = getopt_long(argc, argv, "+:F", optfd, NULL);
+ name = getopt_long(argc, argv, "-:F:", optfd, NULL);
if (name == 'F') {
errno = 0;
@@ -732,6 +741,99 @@ void close_open_files(int argc, char **argv)
rc = close_range(fd + 1, ~0U, CLOSE_RANGE_UNSHARE);
}
- if (rc)
- die_perror("Failed to close files leaked by parent");
+ if (rc) {
+ if (errno == ENOSYS || errno == EINVAL) {
+ /* This probably means close_range() or the
+ * CLOSE_RANGE_UNSHARE flag is not supported by the
+ * kernel. Not much we can do here except carry on and
+ * hope for the best.
+ */
+ warn(
+"Can't use close_range() to ensure no files leaked by parent");
+ } else {
+ die_perror("Failed to close files leaked by parent");
+ }
+ }
+
+}
+
+/**
+ * snprintf_check() - snprintf() wrapper, checking for truncation and errors
+ * @str: Output buffer
+ * @size: Maximum size to write to @str
+ * @format: Message
+ *
+ * Return: false on success, true on truncation or error, sets errno on failure
+ */
+bool snprintf_check(char *str, size_t size, const char *format, ...)
+{
+ va_list ap;
+ int rc;
+
+ va_start(ap, format);
+ rc = vsnprintf(str, size, format, ap);
+ va_end(ap);
+
+ if (rc < 0) {
+ errno = EIO;
+ return true;
+ }
+
+ if ((size_t)rc >= size) {
+ errno = ENOBUFS;
+ return true;
+ }
+
+ return false;
+}
+
+#define DEV_RANDOM "/dev/random"
+
+/**
+ * raw_random() - Get high quality random bytes
+ * @buf: Buffer to fill with random bytes
+ * @buflen: Number of bytes of random data to put in @buf
+ *
+ * Assumes that the random data is essential, and will die() if unable to obtain
+ * it.
+ */
+void raw_random(void *buf, size_t buflen)
+{
+ size_t random_read = 0;
+#ifndef HAS_GETRANDOM
+ int fd = open(DEV_RANDOM, O_RDONLY);
+
+ if (fd < 0)
+ die_perror("Couldn't open %s", DEV_RANDOM);
+#endif
+
+ while (random_read < buflen) {
+ ssize_t ret;
+
+#ifdef HAS_GETRANDOM
+ ret = getrandom((char *)buf + random_read,
+ buflen - random_read, GRND_RANDOM);
+#else
+ ret = read(dev_random, (char *)buf + random_read,
+ buflen - random_read);
+#endif
+
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret < 0)
+ die_perror("Error on random data source");
+
+ if (ret == 0)
+ break;
+
+ random_read += ret;
+ }
+
+#ifndef HAS_GETRANDOM
+ close(dev_random);
+#endif
+
+ if (random_read < buflen)
+ die("Unexpected EOF on random data source");
}