aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2025-06-04 18:23:13 +0200
committerStefano Brivio <sbrivio@redhat.com>2025-06-06 10:46:40 +0200
commit754c6d728686c5d115bd97c628d53733776dd711 (patch)
tree89d74da6d0b937e97041b705dbbbc4954d746ee8
parenta2088fef360ee262c19186470d63875b32f80917 (diff)
downloadpasst-754c6d728686c5d115bd97c628d53733776dd711.tar
passt-754c6d728686c5d115bd97c628d53733776dd711.tar.gz
passt-754c6d728686c5d115bd97c628d53733776dd711.tar.bz2
passt-754c6d728686c5d115bd97c628d53733776dd711.tar.lz
passt-754c6d728686c5d115bd97c628d53733776dd711.tar.xz
passt-754c6d728686c5d115bd97c628d53733776dd711.tar.zst
passt-754c6d728686c5d115bd97c628d53733776dd711.zip
flow, repair: Proper error handling for missing passt-repair helper on target2025_06_06.754c6d7
If the target doesn't have a connected passt-repair helper, repair_wait() sets a timeout and forcibly calls accept4(), but if the timeout expires, we don't report any error and proceed as if we had a connected helper. Later, in repair_flush(), sendmsg() will fail for each flow we try to migrate with EBADF, and we'll handle that gracefully, but it makes no sense to even try, and it also leads to noise in the logs. Similarly to how we handle a missing repair helper in flow_migrate_repair_all(), propagate timeout and other errors through repair_wait(), and don't attempt to migrate flows if repair_wait() failed. Fixes: c8b520c0625b ("flow, repair: Wait for a short while for passt-repair to connect") Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--flow.c3
-rw-r--r--repair.c38
-rw-r--r--repair.h4
3 files changed, 32 insertions, 13 deletions
diff --git a/flow.c b/flow.c
index cc05033..da5c813 100644
--- a/flow.c
+++ b/flow.c
@@ -1130,7 +1130,8 @@ int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
if (!count)
return 0;
- repair_wait(c);
+ if ((rc = repair_wait(c)))
+ return -rc;
if ((rc = flow_migrate_repair_all(c, true)))
return -rc;
diff --git a/repair.c b/repair.c
index 149fe51..f6b1bf3 100644
--- a/repair.c
+++ b/repair.c
@@ -68,18 +68,21 @@ void repair_sock_init(const struct ctx *c)
* repair_listen_handler() - Handle events on TCP_REPAIR helper listening socket
* @c: Execution context
* @events: epoll events
+ *
+ * Return: 0 on valid event with new connected socket, error code on failure
*/
-void repair_listen_handler(struct ctx *c, uint32_t events)
+int repair_listen_handler(struct ctx *c, uint32_t events)
{
union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR };
struct epoll_event ev = { 0 };
struct ucred ucred;
socklen_t len;
+ int rc;
if (events != EPOLLIN) {
debug("Spurious event 0x%04x on TCP_REPAIR helper socket",
events);
- return;
+ return EINVAL;
}
len = sizeof(ucred);
@@ -90,18 +93,19 @@ void repair_listen_handler(struct ctx *c, uint32_t events)
SOCK_NONBLOCK);
if (discard == -1)
- return;
+ return errno;
if (!getsockopt(discard, SOL_SOCKET, SO_PEERCRED, &ucred, &len))
info("Discarding TCP_REPAIR helper, PID %i", ucred.pid);
close(discard);
- return;
+ return EEXIST;
}
if ((c->fd_repair = accept4(c->fd_repair_listen, NULL, NULL, 0)) < 0) {
+ rc = errno;
debug_perror("accept4() on TCP_REPAIR helper listening socket");
- return;
+ return rc;
}
if (!getsockopt(c->fd_repair, SOL_SOCKET, SO_PEERCRED, &ucred, &len))
@@ -111,10 +115,14 @@ void repair_listen_handler(struct ctx *c, uint32_t events)
ev.events = EPOLLHUP | EPOLLET;
ev.data.u64 = ref.u64;
if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair, &ev)) {
+ rc = errno;
debug_perror("epoll_ctl() on TCP_REPAIR helper socket");
close(c->fd_repair);
c->fd_repair = -1;
+ return rc;
}
+
+ return 0;
}
/**
@@ -145,29 +153,39 @@ void repair_handler(struct ctx *c, uint32_t events)
/**
* repair_wait() - Wait (with timeout) for TCP_REPAIR helper to connect
* @c: Execution context
+ *
+ * Return: 0 on success or if already connected, error code on failure
*/
-void repair_wait(struct ctx *c)
+int repair_wait(struct ctx *c)
{
struct timeval tv = { .tv_sec = 0,
.tv_usec = (long)(REPAIR_ACCEPT_TIMEOUT_US) };
+ int rc;
+
static_assert(REPAIR_ACCEPT_TIMEOUT_US < 1000 * 1000,
".tv_usec is greater than 1000 * 1000");
- if (c->fd_repair >= 0 || c->fd_repair_listen == -1)
- return;
+ if (c->fd_repair >= 0)
+ return 0;
+
+ if (c->fd_repair_listen == -1)
+ return ENOENT;
if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
&tv, sizeof(tv))) {
+ rc = errno;
err_perror("Set timeout on TCP_REPAIR listening socket");
- return;
+ return rc;
}
- repair_listen_handler(c, EPOLLIN);
+ rc = repair_listen_handler(c, EPOLLIN);
tv.tv_usec = 0;
if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
&tv, sizeof(tv)))
err_perror("Clear timeout on TCP_REPAIR listening socket");
+
+ return rc;
}
/**
diff --git a/repair.h b/repair.h
index 1d37922..ab27e67 100644
--- a/repair.h
+++ b/repair.h
@@ -7,10 +7,10 @@
#define REPAIR_H
void repair_sock_init(const struct ctx *c);
-void repair_listen_handler(struct ctx *c, uint32_t events);
+int repair_listen_handler(struct ctx *c, uint32_t events);
void repair_handler(struct ctx *c, uint32_t events);
void repair_close(struct ctx *c);
-void repair_wait(struct ctx *c);
+int repair_wait(struct ctx *c);
int repair_flush(struct ctx *c);
int repair_set(struct ctx *c, int s, int cmd);