diff options
| author | Laurent Vivier <lvivier@redhat.com> | 2025-10-21 23:01:11 +0200 |
|---|---|---|
| committer | Stefano Brivio <sbrivio@redhat.com> | 2025-10-30 15:32:12 +0100 |
| commit | 965ea66068e653934c0016281df86c17e2a65625 (patch) | |
| tree | f644a3198f9d6acb3dbb028874831fd3001404cf | |
| parent | 8bfa47a5cf0576dd18e8716e1c1e142954a0b72d (diff) | |
| download | passt-965ea66068e653934c0016281df86c17e2a65625.tar passt-965ea66068e653934c0016281df86c17e2a65625.tar.gz passt-965ea66068e653934c0016281df86c17e2a65625.tar.bz2 passt-965ea66068e653934c0016281df86c17e2a65625.tar.lz passt-965ea66068e653934c0016281df86c17e2a65625.tar.xz passt-965ea66068e653934c0016281df86c17e2a65625.tar.zst passt-965ea66068e653934c0016281df86c17e2a65625.zip | |
epoll_ctl: Extract epoll operations
Centralize epoll_add() and epoll_del() helper functions into new
epoll_ctl.c/h files.
This also moves the union epoll_ref definition from passt.h to
epoll_ctl.h where it's more logically placed.
The new epoll_add() helper simplifies adding file descriptors to epoll
by taking an epoll_ref and events, handling error reporting
consistently across all call sites.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
[sbrivio: Include epoll_ctl.h from netlink.c as it's now needed there]
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
| -rw-r--r-- | Makefile | 22 | ||||
| -rw-r--r-- | epoll_ctl.c | 45 | ||||
| -rw-r--r-- | epoll_ctl.h | 51 | ||||
| -rw-r--r-- | icmp.c | 4 | ||||
| -rw-r--r-- | netlink.c | 1 | ||||
| -rw-r--r-- | passt.c | 2 | ||||
| -rw-r--r-- | passt.h | 34 | ||||
| -rw-r--r-- | pasta.c | 7 | ||||
| -rw-r--r-- | repair.c | 18 | ||||
| -rw-r--r-- | tap.c | 13 | ||||
| -rw-r--r-- | tcp.c | 2 | ||||
| -rw-r--r-- | tcp_splice.c | 2 | ||||
| -rw-r--r-- | udp.c | 2 | ||||
| -rw-r--r-- | udp_flow.c | 1 | ||||
| -rw-r--r-- | util.c | 22 | ||||
| -rw-r--r-- | util.h | 4 | ||||
| -rw-r--r-- | vhost_user.c | 8 | ||||
| -rw-r--r-- | vu_common.c | 2 |
18 files changed, 137 insertions, 103 deletions
@@ -37,23 +37,23 @@ FLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE) FLAGS += -DVERSION=\"$(VERSION)\" FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) -PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ - icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \ - ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c pif.c \ - repair.c tap.c tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c \ - udp_vu.c util.c vhost_user.c virtio.c vu_common.c +PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c epoll_ctl.c \ + flow.c fwd.c icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c \ + log.c mld.c ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c \ + pif.c repair.c tap.c tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c \ + udp_flow.c udp_vu.c util.c vhost_user.c virtio.c vu_common.c QRAP_SRCS = qrap.c PASST_REPAIR_SRCS = passt-repair.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) $(PASST_REPAIR_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 passt-repair.1 -PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \ - flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \ - lineread.h log.h migrate.h ndp.h netlink.h packet.h passt.h pasta.h \ - pcap.h pif.h repair.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h \ - tcp_internal.h tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h \ - udp_vu.h util.h vhost_user.h virtio.h vu_common.h +PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h epoll_ctl.h \ + flow.h fwd.h flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h \ + isolation.h lineread.h log.h migrate.h ndp.h netlink.h packet.h \ + passt.h pasta.h pcap.h pif.h repair.h siphash.h tap.h tcp.h tcp_buf.h \ + tcp_conn.h tcp_internal.h tcp_splice.h tcp_vu.h udp.h udp_flow.h \ + udp_internal.h udp_vu.h util.h vhost_user.h virtio.h vu_common.h HEADERS = $(PASST_HEADERS) seccomp.h C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);} diff --git a/epoll_ctl.c b/epoll_ctl.c new file mode 100644 index 0000000..728a2af --- /dev/null +++ b/epoll_ctl.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* epoll_ctl.c - epoll manipulation helpers + * + * Copyright Red Hat + * Author: Laurent Vivier <lvivier@redhat.com> + */ + +#include <errno.h> + +#include "epoll_ctl.h" + +/** + * epoll_add() - Add a file descriptor to an epollfd + * @epollfd: epoll file descriptor to add to + * @events: epoll events + * @ref: epoll reference for the file descriptor (includes fd and metadata) + * + * Return: 0 on success, negative errno on failure + */ +int epoll_add(int epollfd, uint32_t events, union epoll_ref ref) +{ + struct epoll_event ev; + int ret; + + ev.events = events; + ev.data.u64 = ref.u64; + + ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, ref.fd, &ev); + if (ret == -1) { + ret = -errno; + err("Failed to add fd to epoll: %s", strerror_(-ret)); + } + + return ret; +} + +/** + * epoll_del() - Remove a file descriptor from an epollfd + * @epollfd: epoll file descriptor to remove from + * @fd: File descriptor to remove + */ +void epoll_del(int epollfd, int fd) +{ + epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); +} diff --git a/epoll_ctl.h b/epoll_ctl.h new file mode 100644 index 0000000..2d7e712 --- /dev/null +++ b/epoll_ctl.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: Laurent Vivier <lvivier@redhat.com> + */ + +#ifndef EPOLL_CTL_H +#define EPOLL_CTL_H + +#include <sys/epoll.h> + +#include "util.h" +#include "passt.h" +#include "epoll_type.h" +#include "flow.h" +#include "tcp.h" +#include "udp.h" + +/** + * union epoll_ref - Breakdown of reference for epoll fd bookkeeping + * @type: Type of fd (tells us what to do with events) + * @fd: File descriptor number (implies < 2^24 total descriptors) + * @flow: Index of the flow this fd is linked to + * @tcp_listen: TCP-specific reference part for listening sockets + * @udp: UDP-specific reference part + * @data: Data handled by protocol handlers + * @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone + * @queue: vhost-user queue index for this fd + * @u64: Opaque reference for epoll_ctl() and epoll_wait() + */ +union epoll_ref { + struct { + enum epoll_type type:8; + int32_t fd:FD_REF_BITS; + union { + uint32_t flow; + flow_sidx_t flowside; + union tcp_listen_epoll_ref tcp_listen; + union udp_listen_epoll_ref udp; + uint32_t data; + int nsdir_fd; + int queue; + }; + }; + uint64_t u64; +}; +static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data), + "epoll_ref must have same size as epoll_data"); + +int epoll_add(int epollfd, uint32_t events, union epoll_ref ref); +void epoll_del(int epollfd, int fd); +#endif /* EPOLL_CTL_H */ @@ -15,7 +15,6 @@ #include <errno.h> #include <net/ethernet.h> #include <net/if.h> -#include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <stdio.h> @@ -23,10 +22,8 @@ #include <stdint.h> #include <stddef.h> #include <string.h> -#include <sys/epoll.h> #include <sys/types.h> #include <sys/socket.h> -#include <unistd.h> #include <time.h> #include <linux/icmpv6.h> @@ -41,6 +38,7 @@ #include "inany.h" #include "icmp.h" #include "flow_table.h" +#include "epoll_ctl.h" #define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */ #define ICMP_NUM_IDS (1U << 16) @@ -36,6 +36,7 @@ #include "log.h" #include "ip.h" #include "netlink.h" +#include "epoll_ctl.h" /* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */ #define RTNH_NEXT_AND_DEC(rtnh, attrlen) \ @@ -19,7 +19,6 @@ * created in a separate network namespace). */ -#include <sys/epoll.h> #include <fcntl.h> #include <sys/mman.h> #include <sys/resource.h> @@ -54,6 +53,7 @@ #include "migrate.h" #include "repair.h" #include "netlink.h" +#include "epoll_ctl.h" #define NUM_EPOLL_EVENTS 8 @@ -35,40 +35,6 @@ union epoll_ref; #define MAC_OUR_LAA \ ((uint8_t [ETH_ALEN]){0x9a, 0x55, 0x9a, 0x55, 0x9a, 0x55}) -/** - * union epoll_ref - Breakdown of reference for epoll fd bookkeeping - * @type: Type of fd (tells us what to do with events) - * @fd: File descriptor number (implies < 2^24 total descriptors) - * @flow: Index of the flow this fd is linked to - * @tcp_listen: TCP-specific reference part for listening sockets - * @udp: UDP-specific reference part - * @icmp: ICMP-specific reference part - * @data: Data handled by protocol handlers - * @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone - * @queue: vhost-user queue index for this fd - * @u64: Opaque reference for epoll_ctl() and epoll_wait() - */ -union epoll_ref { - struct { - enum epoll_type type:8; -#define FD_REF_BITS 24 -#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS)) - int32_t fd:FD_REF_BITS; - union { - uint32_t flow; - flow_sidx_t flowside; - union tcp_listen_epoll_ref tcp_listen; - union udp_listen_epoll_ref udp; - uint32_t data; - int nsdir_fd; - int queue; - }; - }; - uint64_t u64; -}; -static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data), - "epoll_ref must have same size as epoll_data"); - /* Large enough for ~128 maximum size frames */ #define PKT_BUF_BYTES (8UL << 20) @@ -27,7 +27,6 @@ #include <stdint.h> #include <unistd.h> #include <syslog.h> -#include <sys/epoll.h> #include <sys/inotify.h> #include <sys/mount.h> #include <sys/timerfd.h> @@ -49,6 +48,7 @@ #include "isolation.h" #include "netlink.h" #include "log.h" +#include "epoll_ctl.h" #define HOSTNAME_PREFIX "pasta-" @@ -444,7 +444,6 @@ static int pasta_netns_quit_timer(void) */ void pasta_netns_quit_init(const struct ctx *c) { - struct epoll_event ev = { .events = EPOLLIN }; int flags = O_NONBLOCK | O_CLOEXEC; struct statfs s = { 0 }; bool try_inotify = true; @@ -487,8 +486,8 @@ void pasta_netns_quit_init(const struct ctx *c) die("netns monitor file number %i too big, exiting", fd); ref.fd = fd; - ev.data.u64 = ref.u64; - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev); + + epoll_add(c->epollfd, EPOLLIN, ref); } /** @@ -22,6 +22,7 @@ #include "inany.h" #include "flow.h" #include "flow_table.h" +#include "epoll_ctl.h" #include "repair.h" @@ -47,7 +48,6 @@ static int repair_nfds; void repair_sock_init(const struct ctx *c) { union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR_LISTEN }; - struct epoll_event ev = { 0 }; if (c->fd_repair_listen == -1) return; @@ -58,10 +58,8 @@ void repair_sock_init(const struct ctx *c) } ref.fd = c->fd_repair_listen; - ev.events = EPOLLIN | EPOLLHUP | EPOLLET; - ev.data.u64 = ref.u64; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair_listen, &ev)) - err_perror("repair helper socket epoll_ctl(), won't migrate"); + if (epoll_add(c->epollfd, EPOLLIN | EPOLLHUP | EPOLLET, ref)) + err("repair helper socket epoll_ctl(), won't migrate"); } /** @@ -74,7 +72,6 @@ void repair_sock_init(const struct ctx *c) int repair_listen_handler(struct ctx *c, uint32_t events) { union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR }; - struct epoll_event ev = { 0 }; struct ucred ucred; socklen_t len; int rc; @@ -112,11 +109,10 @@ int repair_listen_handler(struct ctx *c, uint32_t events) info("Accepted TCP_REPAIR helper, PID %i", ucred.pid); ref.fd = c->fd_repair; - ev.events = EPOLLHUP | EPOLLET; - ev.data.u64 = ref.u64; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair, &ev)) { - rc = errno; - debug_perror("epoll_ctl() on TCP_REPAIR helper socket"); + + rc = epoll_add(c->epollfd, EPOLLHUP | EPOLLET, ref); + if (rc < 0) { + debug("epoll_ctl() on TCP_REPAIR helper socket"); close(c->fd_repair); c->fd_repair = -1; return rc; @@ -26,7 +26,6 @@ #include <netinet/in.h> #include <arpa/inet.h> #include <stdint.h> -#include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/types.h> #include <sys/stat.h> @@ -61,6 +60,7 @@ #include "log.h" #include "vhost_user.h" #include "vu_common.h" +#include "epoll_ctl.h" /* Maximum allowed frame lengths (including L2 header) */ @@ -1331,14 +1331,12 @@ static void tap_backend_show_hints(struct ctx *c) static void tap_sock_unix_init(const struct ctx *c) { union epoll_ref ref = { .type = EPOLL_TYPE_TAP_LISTEN }; - struct epoll_event ev = { 0 }; listen(c->fd_tap_listen, 0); ref.fd = c->fd_tap_listen; - ev.events = EPOLLIN | EPOLLET; - ev.data.u64 = ref.u64; - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev); + + epoll_add(c->epollfd, EPOLLIN | EPOLLET, ref); } /** @@ -1347,7 +1345,6 @@ static void tap_sock_unix_init(const struct ctx *c) */ static void tap_start_connection(const struct ctx *c) { - struct epoll_event ev = { 0 }; union epoll_ref ref = { 0 }; ref.fd = c->fd_tap; @@ -1363,9 +1360,7 @@ static void tap_start_connection(const struct ctx *c) break; } - ev.events = EPOLLIN | EPOLLRDHUP; - ev.data.u64 = ref.u64; - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); + epoll_add(c->epollfd, EPOLLIN | EPOLLRDHUP, ref); if (c->ifi4) arp_send_init_req(c); @@ -279,7 +279,6 @@ #include <stdbool.h> #include <stddef.h> #include <string.h> -#include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <sys/timerfd.h> @@ -309,6 +308,7 @@ #include "tcp_internal.h" #include "tcp_buf.h" #include "tcp_vu.h" +#include "epoll_ctl.h" /* * The size of TCP header (including options) is given by doff (Data Offset) diff --git a/tcp_splice.c b/tcp_splice.c index 666ee62..6f21184 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -44,7 +44,6 @@ #include <net/ethernet.h> #include <netinet/in.h> #include <netinet/tcp.h> -#include <sys/epoll.h> #include <sys/types.h> #include <sys/socket.h> @@ -56,6 +55,7 @@ #include "siphash.h" #include "inany.h" #include "flow.h" +#include "epoll_ctl.h" #include "flow_table.h" @@ -94,7 +94,6 @@ #include <stdint.h> #include <stddef.h> #include <string.h> -#include <sys/epoll.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/uio.h> @@ -115,6 +114,7 @@ #include "flow_table.h" #include "udp_internal.h" #include "udp_vu.h" +#include "epoll_ctl.h" #define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ @@ -15,6 +15,7 @@ #include "passt.h" #include "flow_table.h" #include "udp_internal.h" +#include "epoll_ctl.h" #define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ @@ -18,7 +18,6 @@ #include <unistd.h> #include <arpa/inet.h> #include <net/ethernet.h> -#include <sys/epoll.h> #include <sys/uio.h> #include <fcntl.h> #include <string.h> @@ -35,6 +34,7 @@ #include "packet.h" #include "log.h" #include "pcap.h" +#include "epoll_ctl.h" #ifdef HAS_GETRANDOM #include <sys/random.h> #endif @@ -58,7 +58,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, sa_family_t af = ((const struct sockaddr *)sa)->sa_family; union epoll_ref ref = { .type = type, .data = data }; bool freebind = false; - struct epoll_event ev; int fd, y = 1, ret; uint8_t proto; int socktype; @@ -172,13 +171,9 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, return ret; } - ev.events = EPOLLIN; - ev.data.u64 = ref.u64; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { - ret = -errno; - warn("L4 epoll_ctl: %s", strerror_(-ret)); + ret = epoll_add(c->epollfd, EPOLLIN, ref); + if (ret < 0) return ret; - } return fd; } @@ -995,17 +990,6 @@ void raw_random(void *buf, size_t buflen) } /** - * epoll_del() - Remove a file descriptor from our passt epoll - * @epollfd: epoll file descriptor to remove from - * @fd: File descriptor to remove - */ -void epoll_del(int epollfd, int fd) -{ - epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); - -} - -/** * encode_domain_name() - Encode domain name according to RFC 1035, section 3.1 * @buf: Buffer to fill in with encoded domain name * @domain_name: Input domain name string with terminator @@ -195,6 +195,9 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, #define SNDBUF_BIG (4ULL * 1024 * 1024) #define SNDBUF_SMALL (128ULL * 1024) +#define FD_REF_BITS 24 +#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS)) + #include <net/if.h> #include <limits.h> #include <stdint.h> @@ -302,7 +305,6 @@ static inline bool mod_between(unsigned x, unsigned i, unsigned j, unsigned m) #define FPRINTF(f, ...) (void)fprintf(f, __VA_ARGS__) void raw_random(void *buf, size_t buflen); -void epoll_del(int epollfd, int fd); /* * Starting from glibc 2.40.9000 and commit 25a5eb4010df ("string: strerror, diff --git a/vhost_user.c b/vhost_user.c index f8324c5..aa7c869 100644 --- a/vhost_user.c +++ b/vhost_user.c @@ -32,8 +32,6 @@ #include <inttypes.h> #include <time.h> #include <net/ethernet.h> -#include <netinet/in.h> -#include <sys/epoll.h> #include <sys/eventfd.h> #include <sys/mman.h> #include <linux/vhost_types.h> @@ -45,6 +43,7 @@ #include "vhost_user.h" #include "pcap.h" #include "migrate.h" +#include "epoll_ctl.h" /* vhost-user version we are compatible with */ #define VHOST_USER_VERSION 1 @@ -753,11 +752,8 @@ static void vu_set_watch(const struct vu_dev *vdev, int idx) .fd = vdev->vq[idx].kick_fd, .queue = idx }; - struct epoll_event ev = { 0 }; - ev.data.u64 = ref.u64; - ev.events = EPOLLIN; - epoll_ctl(vdev->context->epollfd, EPOLL_CTL_ADD, ref.fd, &ev); + epoll_add(vdev->context->epollfd, EPOLLIN, ref); } /** diff --git a/vu_common.c b/vu_common.c index b716070..b13b7c3 100644 --- a/vu_common.c +++ b/vu_common.c @@ -6,7 +6,6 @@ */ #include <errno.h> -#include <unistd.h> #include <sys/uio.h> #include <sys/eventfd.h> #include <netinet/if_ether.h> @@ -19,6 +18,7 @@ #include "pcap.h" #include "vu_common.h" #include "migrate.h" +#include "epoll_ctl.h" #define VU_MAX_TX_BUFFER_NB 2 |
