From 0c6c20dee5c24bd324834a99f409ad43c50812ae Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 4 Sep 2024 18:28:49 +0200 Subject: udp, udp_flow: Add instrumentation, handle EPOLLERR without queued errors Link: https://github.com/containers/podman/issues/23686#issuecomment-2324945010 Signed-off-by: Stefano Brivio --- udp.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++----------- udp_flow.c | 7 ++++-- udp_flow.h | 3 +++ 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/udp.c b/udp.c index bd9051e..0232635 100644 --- a/udp.c +++ b/udp.c @@ -387,11 +387,11 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx, * udp_sock_recverr() - Receive and clear an error from a socket * @s: Socket to receive from * - * Return: true if errors received and processed, false if no more errors + * Return: ee_errno, 0 on empty queue * * #syscalls recvmsg */ -static bool udp_sock_recverr(int s) +static int udp_sock_recverr(int s) { const struct sock_extended_err *ee; const struct cmsghdr *hdr; @@ -410,12 +410,13 @@ static bool udp_sock_recverr(int s) if (rc < 0) { if (errno != EAGAIN && errno != EWOULDBLOCK) err_perror("Failed to read error queue"); - return false; + + return 0; } if (!(mh.msg_flags & MSG_ERRQUEUE)) { err("Missing MSG_ERRQUEUE flag reading error queue"); - return false; + return 0; } hdr = CMSG_FIRSTHDR(&mh); @@ -424,7 +425,7 @@ static bool udp_sock_recverr(int s) (hdr->cmsg_level == IPPROTO_IPV6 && hdr->cmsg_type == IPV6_RECVERR))) { err("Unexpected cmsg reading error queue"); - return false; + return 0; } ee = (const struct sock_extended_err *)CMSG_DATA(hdr); @@ -433,7 +434,7 @@ static bool udp_sock_recverr(int s) debug("%s error on UDP socket %i: %s", str_ee_origin(ee), s, strerror(ee->ee_errno)); - return true; + return ee->ee_errno; } /** @@ -441,12 +442,15 @@ static bool udp_sock_recverr(int s) * @c: Execution context * @s: Socket to receive from * @events: epoll events bitmap - * @mmh mmsghdr array to receive into + * @mmh: mmsghdr array to receive into + * @recv_err: Set to last error in queue. If none: -1 on EPOLLERR, 0 otherwise + * + * Return: count of datagrams received * * #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64 */ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events, - struct mmsghdr *mmh) + struct mmsghdr *mmh, int *recv_err) { /* For not entirely clear reasons (data locality?) pasta gets better * throughput if we receive tap datagrams one at a atime. For small @@ -461,8 +465,13 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events, /* Clear any errors first */ if (events & EPOLLERR) { - while (udp_sock_recverr(s)) - ; + bool found = false; + int ret; + + while ((ret = udp_sock_recverr(s))) + found = true; + + *recv_err = found ? ret : -1; } if (!(events & EPOLLIN)) @@ -490,9 +499,10 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now) { const socklen_t sasize = sizeof(udp_meta[0].s_in); + int recv_err = 0; int n, i; - if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv)) <= 0) + if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv, &recv_err)) <= 0) return; /* We divide datagrams into batches based on how we need to send them, @@ -562,11 +572,49 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, struct udp_flow *uflow = udp_at_sidx(ref.flowside); int from_s = uflow->s[ref.flowside.sidei]; uint8_t topif = pif_at_sidx(tosidx); + int recv_err = 0; int n, i; ASSERT(!c->no_udp && uflow); - if ((n = udp_sock_recv(c, from_s, events, udp_mh_recv)) <= 0) + n = udp_sock_recv(c, from_s, events, udp_mh_recv, &recv_err); + if (recv_err == -1) { + struct flow_common *f = &uflow->f; + char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN]; + char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN]; + const struct flowside *ini = &f->side[INISIDE]; + const struct flowside *tgt = &f->side[TGTSIDE]; + + flow_err(uflow, "EPOLLERR without error queue, closing flow"); + err("Last recorded errno was: %i (%s)", uflow->last_errno, + strerror(uflow->last_errno)); + + flow_log_(f, LOG_ERR, + "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu", + pif_name(f->pif[INISIDE]), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), + ini->eport, + inany_ntop(&ini->oaddr, fstr0, sizeof(fstr0)), + ini->oport, + pif_name(f->pif[TGTSIDE]), + inany_ntop(&tgt->oaddr, fstr1, sizeof(fstr1)), + tgt->oport, + inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)), + tgt->eport); + + udp_flow_close(c, uflow); + return; + } + + if (recv_err) { + struct udp_flow *uflow = udp_at_sidx(udp_meta[0].tosidx); + + uflow->last_errno = recv_err; + flow_err(uflow, "Recorded errno %i (%s)", recv_err, + strerror(recv_err)); + } + + if (n <= 0) return; flow_trace(uflow, "Received %d datagrams on reply socket", n); diff --git a/udp_flow.c b/udp_flow.c index 1ff59a9..a655644 100644 --- a/udp_flow.c +++ b/udp_flow.c @@ -34,12 +34,12 @@ struct udp_flow *udp_at_sidx(flow_sidx_t sidx) return &flow->udp; } -/* +/** * udp_flow_close() - Close and clean up UDP flow * @c: Execution context * @uflow: UDP flow */ -static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow) +void udp_flow_close(const struct ctx *c, struct udp_flow *uflow) { if (uflow->s[INISIDE] >= 0) { /* The listening socket needs to stay in epoll */ @@ -53,6 +53,9 @@ static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow) close(uflow->s[TGTSIDE]); uflow->s[TGTSIDE] = -1; } + + uflow->last_errno = 0; + flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE)); if (!pif_is_socket(uflow->f.pif[TGTSIDE])) flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE)); diff --git a/udp_flow.h b/udp_flow.h index 12ddf03..6996d24 100644 --- a/udp_flow.h +++ b/udp_flow.h @@ -19,6 +19,8 @@ struct udp_flow { time_t ts; int s[SIDES]; + + int last_errno; }; struct udp_flow *udp_at_sidx(flow_sidx_t sidx); @@ -30,6 +32,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, const void *saddr, const void *daddr, in_port_t srcport, in_port_t dstport, const struct timespec *now); +void udp_flow_close(const struct ctx *c, struct udp_flow *uflow); bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, const struct timespec *now); -- cgit v1.2.3