From 965ea66068e653934c0016281df86c17e2a65625 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 21 Oct 2025 23:01:11 +0200 Subject: epoll_ctl: Extract epoll operations Centralize epoll_add() and epoll_del() helper functions into new epoll_ctl.c/h files. This also moves the union epoll_ref definition from passt.h to epoll_ctl.h where it's more logically placed. The new epoll_add() helper simplifies adding file descriptors to epoll by taking an epoll_ref and events, handling error reporting consistently across all call sites. Signed-off-by: Laurent Vivier [sbrivio: Include epoll_ctl.h from netlink.c as it's now needed there] Reviewed-by: David Gibson Signed-off-by: Stefano Brivio --- Makefile | 22 +++++++++++----------- epoll_ctl.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ epoll_ctl.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ icmp.c | 4 +--- netlink.c | 1 + passt.c | 2 +- passt.h | 34 ---------------------------------- pasta.c | 7 +++---- repair.c | 18 +++++++----------- tap.c | 13 ++++--------- tcp.c | 2 +- tcp_splice.c | 2 +- udp.c | 2 +- udp_flow.c | 1 + util.c | 22 +++------------------- util.h | 4 +++- vhost_user.c | 8 ++------ vu_common.c | 2 +- 18 files changed, 137 insertions(+), 103 deletions(-) create mode 100644 epoll_ctl.c create mode 100644 epoll_ctl.h diff --git a/Makefile b/Makefile index 3328f83..91e037b 100644 --- a/Makefile +++ b/Makefile @@ -37,23 +37,23 @@ FLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE) FLAGS += -DVERSION=\"$(VERSION)\" FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) -PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ - icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \ - ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c pif.c \ - repair.c tap.c tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c \ - udp_vu.c util.c vhost_user.c virtio.c vu_common.c +PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c epoll_ctl.c \ + flow.c fwd.c icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c \ + log.c mld.c ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c \ + pif.c repair.c tap.c tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c \ + udp_flow.c udp_vu.c util.c vhost_user.c virtio.c vu_common.c QRAP_SRCS = qrap.c PASST_REPAIR_SRCS = passt-repair.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) $(PASST_REPAIR_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 passt-repair.1 -PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \ - flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \ - lineread.h log.h migrate.h ndp.h netlink.h packet.h passt.h pasta.h \ - pcap.h pif.h repair.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h \ - tcp_internal.h tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h \ - udp_vu.h util.h vhost_user.h virtio.h vu_common.h +PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h epoll_ctl.h \ + flow.h fwd.h flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h \ + isolation.h lineread.h log.h migrate.h ndp.h netlink.h packet.h \ + passt.h pasta.h pcap.h pif.h repair.h siphash.h tap.h tcp.h tcp_buf.h \ + tcp_conn.h tcp_internal.h tcp_splice.h tcp_vu.h udp.h udp_flow.h \ + udp_internal.h udp_vu.h util.h vhost_user.h virtio.h vu_common.h HEADERS = $(PASST_HEADERS) seccomp.h C := \#include \nint main(){int a=getrandom(0, 0, 0);} diff --git a/epoll_ctl.c b/epoll_ctl.c new file mode 100644 index 0000000..728a2af --- /dev/null +++ b/epoll_ctl.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* epoll_ctl.c - epoll manipulation helpers + * + * Copyright Red Hat + * Author: Laurent Vivier + */ + +#include + +#include "epoll_ctl.h" + +/** + * epoll_add() - Add a file descriptor to an epollfd + * @epollfd: epoll file descriptor to add to + * @events: epoll events + * @ref: epoll reference for the file descriptor (includes fd and metadata) + * + * Return: 0 on success, negative errno on failure + */ +int epoll_add(int epollfd, uint32_t events, union epoll_ref ref) +{ + struct epoll_event ev; + int ret; + + ev.events = events; + ev.data.u64 = ref.u64; + + ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, ref.fd, &ev); + if (ret == -1) { + ret = -errno; + err("Failed to add fd to epoll: %s", strerror_(-ret)); + } + + return ret; +} + +/** + * epoll_del() - Remove a file descriptor from an epollfd + * @epollfd: epoll file descriptor to remove from + * @fd: File descriptor to remove + */ +void epoll_del(int epollfd, int fd) +{ + epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); +} diff --git a/epoll_ctl.h b/epoll_ctl.h new file mode 100644 index 0000000..2d7e712 --- /dev/null +++ b/epoll_ctl.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: Laurent Vivier + */ + +#ifndef EPOLL_CTL_H +#define EPOLL_CTL_H + +#include + +#include "util.h" +#include "passt.h" +#include "epoll_type.h" +#include "flow.h" +#include "tcp.h" +#include "udp.h" + +/** + * union epoll_ref - Breakdown of reference for epoll fd bookkeeping + * @type: Type of fd (tells us what to do with events) + * @fd: File descriptor number (implies < 2^24 total descriptors) + * @flow: Index of the flow this fd is linked to + * @tcp_listen: TCP-specific reference part for listening sockets + * @udp: UDP-specific reference part + * @data: Data handled by protocol handlers + * @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone + * @queue: vhost-user queue index for this fd + * @u64: Opaque reference for epoll_ctl() and epoll_wait() + */ +union epoll_ref { + struct { + enum epoll_type type:8; + int32_t fd:FD_REF_BITS; + union { + uint32_t flow; + flow_sidx_t flowside; + union tcp_listen_epoll_ref tcp_listen; + union udp_listen_epoll_ref udp; + uint32_t data; + int nsdir_fd; + int queue; + }; + }; + uint64_t u64; +}; +static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data), + "epoll_ref must have same size as epoll_data"); + +int epoll_add(int epollfd, uint32_t events, union epoll_ref ref); +void epoll_del(int epollfd, int fd); +#endif /* EPOLL_CTL_H */ diff --git a/icmp.c b/icmp.c index d2fd7c8..fb7d1c7 100644 --- a/icmp.c +++ b/icmp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -23,10 +22,8 @@ #include #include #include -#include #include #include -#include #include #include @@ -41,6 +38,7 @@ #include "inany.h" #include "icmp.h" #include "flow_table.h" +#include "epoll_ctl.h" #define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */ #define ICMP_NUM_IDS (1U << 16) diff --git a/netlink.c b/netlink.c index 55d3207..82a2f0c 100644 --- a/netlink.c +++ b/netlink.c @@ -36,6 +36,7 @@ #include "log.h" #include "ip.h" #include "netlink.h" +#include "epoll_ctl.h" /* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */ #define RTNH_NEXT_AND_DEC(rtnh, attrlen) \ diff --git a/passt.c b/passt.c index 0b9c88a..2e96d04 100644 --- a/passt.c +++ b/passt.c @@ -19,7 +19,6 @@ * created in a separate network namespace). */ -#include #include #include #include @@ -54,6 +53,7 @@ #include "migrate.h" #include "repair.h" #include "netlink.h" +#include "epoll_ctl.h" #define NUM_EPOLL_EVENTS 8 diff --git a/passt.h b/passt.h index ff0236c..15801b4 100644 --- a/passt.h +++ b/passt.h @@ -35,40 +35,6 @@ union epoll_ref; #define MAC_OUR_LAA \ ((uint8_t [ETH_ALEN]){0x9a, 0x55, 0x9a, 0x55, 0x9a, 0x55}) -/** - * union epoll_ref - Breakdown of reference for epoll fd bookkeeping - * @type: Type of fd (tells us what to do with events) - * @fd: File descriptor number (implies < 2^24 total descriptors) - * @flow: Index of the flow this fd is linked to - * @tcp_listen: TCP-specific reference part for listening sockets - * @udp: UDP-specific reference part - * @icmp: ICMP-specific reference part - * @data: Data handled by protocol handlers - * @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone - * @queue: vhost-user queue index for this fd - * @u64: Opaque reference for epoll_ctl() and epoll_wait() - */ -union epoll_ref { - struct { - enum epoll_type type:8; -#define FD_REF_BITS 24 -#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS)) - int32_t fd:FD_REF_BITS; - union { - uint32_t flow; - flow_sidx_t flowside; - union tcp_listen_epoll_ref tcp_listen; - union udp_listen_epoll_ref udp; - uint32_t data; - int nsdir_fd; - int queue; - }; - }; - uint64_t u64; -}; -static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data), - "epoll_ref must have same size as epoll_data"); - /* Large enough for ~128 maximum size frames */ #define PKT_BUF_BYTES (8UL << 20) diff --git a/pasta.c b/pasta.c index a42cfd8..674b554 100644 --- a/pasta.c +++ b/pasta.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -49,6 +48,7 @@ #include "isolation.h" #include "netlink.h" #include "log.h" +#include "epoll_ctl.h" #define HOSTNAME_PREFIX "pasta-" @@ -444,7 +444,6 @@ static int pasta_netns_quit_timer(void) */ void pasta_netns_quit_init(const struct ctx *c) { - struct epoll_event ev = { .events = EPOLLIN }; int flags = O_NONBLOCK | O_CLOEXEC; struct statfs s = { 0 }; bool try_inotify = true; @@ -487,8 +486,8 @@ void pasta_netns_quit_init(const struct ctx *c) die("netns monitor file number %i too big, exiting", fd); ref.fd = fd; - ev.data.u64 = ref.u64; - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev); + + epoll_add(c->epollfd, EPOLLIN, ref); } /** diff --git a/repair.c b/repair.c index f6b1bf3..69c5307 100644 --- a/repair.c +++ b/repair.c @@ -22,6 +22,7 @@ #include "inany.h" #include "flow.h" #include "flow_table.h" +#include "epoll_ctl.h" #include "repair.h" @@ -47,7 +48,6 @@ static int repair_nfds; void repair_sock_init(const struct ctx *c) { union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR_LISTEN }; - struct epoll_event ev = { 0 }; if (c->fd_repair_listen == -1) return; @@ -58,10 +58,8 @@ void repair_sock_init(const struct ctx *c) } ref.fd = c->fd_repair_listen; - ev.events = EPOLLIN | EPOLLHUP | EPOLLET; - ev.data.u64 = ref.u64; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair_listen, &ev)) - err_perror("repair helper socket epoll_ctl(), won't migrate"); + if (epoll_add(c->epollfd, EPOLLIN | EPOLLHUP | EPOLLET, ref)) + err("repair helper socket epoll_ctl(), won't migrate"); } /** @@ -74,7 +72,6 @@ void repair_sock_init(const struct ctx *c) int repair_listen_handler(struct ctx *c, uint32_t events) { union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR }; - struct epoll_event ev = { 0 }; struct ucred ucred; socklen_t len; int rc; @@ -112,11 +109,10 @@ int repair_listen_handler(struct ctx *c, uint32_t events) info("Accepted TCP_REPAIR helper, PID %i", ucred.pid); ref.fd = c->fd_repair; - ev.events = EPOLLHUP | EPOLLET; - ev.data.u64 = ref.u64; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair, &ev)) { - rc = errno; - debug_perror("epoll_ctl() on TCP_REPAIR helper socket"); + + rc = epoll_add(c->epollfd, EPOLLHUP | EPOLLET, ref); + if (rc < 0) { + debug("epoll_ctl() on TCP_REPAIR helper socket"); close(c->fd_repair); c->fd_repair = -1; return rc; diff --git a/tap.c b/tap.c index 114dade..bb139d6 100644 --- a/tap.c +++ b/tap.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -61,6 +60,7 @@ #include "log.h" #include "vhost_user.h" #include "vu_common.h" +#include "epoll_ctl.h" /* Maximum allowed frame lengths (including L2 header) */ @@ -1331,14 +1331,12 @@ static void tap_backend_show_hints(struct ctx *c) static void tap_sock_unix_init(const struct ctx *c) { union epoll_ref ref = { .type = EPOLL_TYPE_TAP_LISTEN }; - struct epoll_event ev = { 0 }; listen(c->fd_tap_listen, 0); ref.fd = c->fd_tap_listen; - ev.events = EPOLLIN | EPOLLET; - ev.data.u64 = ref.u64; - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev); + + epoll_add(c->epollfd, EPOLLIN | EPOLLET, ref); } /** @@ -1347,7 +1345,6 @@ static void tap_sock_unix_init(const struct ctx *c) */ static void tap_start_connection(const struct ctx *c) { - struct epoll_event ev = { 0 }; union epoll_ref ref = { 0 }; ref.fd = c->fd_tap; @@ -1363,9 +1360,7 @@ static void tap_start_connection(const struct ctx *c) break; } - ev.events = EPOLLIN | EPOLLRDHUP; - ev.data.u64 = ref.u64; - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); + epoll_add(c->epollfd, EPOLLIN | EPOLLRDHUP, ref); if (c->ifi4) arp_send_init_req(c); diff --git a/tcp.c b/tcp.c index 398785c..48dec5a 100644 --- a/tcp.c +++ b/tcp.c @@ -279,7 +279,6 @@ #include #include #include -#include #include #include #include @@ -309,6 +308,7 @@ #include "tcp_internal.h" #include "tcp_buf.h" #include "tcp_vu.h" +#include "epoll_ctl.h" /* * The size of TCP header (including options) is given by doff (Data Offset) diff --git a/tcp_splice.c b/tcp_splice.c index 666ee62..6f21184 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include @@ -56,6 +55,7 @@ #include "siphash.h" #include "inany.h" #include "flow.h" +#include "epoll_ctl.h" #include "flow_table.h" diff --git a/udp.c b/udp.c index 3981e9b..f052a0c 100644 --- a/udp.c +++ b/udp.c @@ -94,7 +94,6 @@ #include #include #include -#include #include #include #include @@ -115,6 +114,7 @@ #include "flow_table.h" #include "udp_internal.h" #include "udp_vu.h" +#include "epoll_ctl.h" #define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ diff --git a/udp_flow.c b/udp_flow.c index 84973f8..d9c75f1 100644 --- a/udp_flow.c +++ b/udp_flow.c @@ -15,6 +15,7 @@ #include "passt.h" #include "flow_table.h" #include "udp_internal.h" +#include "epoll_ctl.h" #define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ diff --git a/util.c b/util.c index 1067486..e3f24f7 100644 --- a/util.c +++ b/util.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -35,6 +34,7 @@ #include "packet.h" #include "log.h" #include "pcap.h" +#include "epoll_ctl.h" #ifdef HAS_GETRANDOM #include #endif @@ -58,7 +58,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, sa_family_t af = ((const struct sockaddr *)sa)->sa_family; union epoll_ref ref = { .type = type, .data = data }; bool freebind = false; - struct epoll_event ev; int fd, y = 1, ret; uint8_t proto; int socktype; @@ -172,13 +171,9 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, return ret; } - ev.events = EPOLLIN; - ev.data.u64 = ref.u64; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { - ret = -errno; - warn("L4 epoll_ctl: %s", strerror_(-ret)); + ret = epoll_add(c->epollfd, EPOLLIN, ref); + if (ret < 0) return ret; - } return fd; } @@ -994,17 +989,6 @@ void raw_random(void *buf, size_t buflen) die("Unexpected EOF on random data source"); } -/** - * epoll_del() - Remove a file descriptor from our passt epoll - * @epollfd: epoll file descriptor to remove from - * @fd: File descriptor to remove - */ -void epoll_del(int epollfd, int fd) -{ - epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); - -} - /** * encode_domain_name() - Encode domain name according to RFC 1035, section 3.1 * @buf: Buffer to fill in with encoded domain name diff --git a/util.h b/util.h index 743f0b0..fda0375 100644 --- a/util.h +++ b/util.h @@ -195,6 +195,9 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, #define SNDBUF_BIG (4ULL * 1024 * 1024) #define SNDBUF_SMALL (128ULL * 1024) +#define FD_REF_BITS 24 +#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS)) + #include #include #include @@ -302,7 +305,6 @@ static inline bool mod_between(unsigned x, unsigned i, unsigned j, unsigned m) #define FPRINTF(f, ...) (void)fprintf(f, __VA_ARGS__) void raw_random(void *buf, size_t buflen); -void epoll_del(int epollfd, int fd); /* * Starting from glibc 2.40.9000 and commit 25a5eb4010df ("string: strerror, diff --git a/vhost_user.c b/vhost_user.c index f8324c5..aa7c869 100644 --- a/vhost_user.c +++ b/vhost_user.c @@ -32,8 +32,6 @@ #include #include #include -#include -#include #include #include #include @@ -45,6 +43,7 @@ #include "vhost_user.h" #include "pcap.h" #include "migrate.h" +#include "epoll_ctl.h" /* vhost-user version we are compatible with */ #define VHOST_USER_VERSION 1 @@ -753,11 +752,8 @@ static void vu_set_watch(const struct vu_dev *vdev, int idx) .fd = vdev->vq[idx].kick_fd, .queue = idx }; - struct epoll_event ev = { 0 }; - ev.data.u64 = ref.u64; - ev.events = EPOLLIN; - epoll_ctl(vdev->context->epollfd, EPOLL_CTL_ADD, ref.fd, &ev); + epoll_add(vdev->context->epollfd, EPOLLIN, ref); } /** diff --git a/vu_common.c b/vu_common.c index b716070..b13b7c3 100644 --- a/vu_common.c +++ b/vu_common.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -19,6 +18,7 @@ #include "pcap.h" #include "vu_common.h" #include "migrate.h" +#include "epoll_ctl.h" #define VU_MAX_TX_BUFFER_NB 2 -- cgit v1.2.3