aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorJon Maloy <jmaloy@redhat.com>2025-03-06 13:00:04 -0500
committerStefano Brivio <sbrivio@redhat.com>2025-03-07 02:21:19 +0100
commit55431f0077b6a25c264bd2492680d7f99815cc5f (patch)
treec307a2a0cc92186af68275fb113068523c6044dc
parent82a839be988ecfdb013b5823afc93211200a9f55 (diff)
downloadpasst-55431f0077b6a25c264bd2492680d7f99815cc5f.tar
passt-55431f0077b6a25c264bd2492680d7f99815cc5f.tar.gz
passt-55431f0077b6a25c264bd2492680d7f99815cc5f.tar.bz2
passt-55431f0077b6a25c264bd2492680d7f99815cc5f.tar.lz
passt-55431f0077b6a25c264bd2492680d7f99815cc5f.tar.xz
passt-55431f0077b6a25c264bd2492680d7f99815cc5f.tar.zst
passt-55431f0077b6a25c264bd2492680d7f99815cc5f.zip
udp: create and send ICMPv4 to local peer when applicable
When a local peer sends a UDP message to a non-existing port on an existing remote host, that host will return an ICMP message containing the error code ICMP_PORT_UNREACH, plus the header and the first eight bytes of the original message. If the sender socket has been connected, it uses this message to issue a "Connection Refused" event to the user. Until now, we have only read such events from the externally facing socket, but we don't forward them back to the local sender because we cannot read the ICMP message directly to user space. Because of this, the local peer will hang and wait for a response that never arrives. We now fix this for IPv4 by recreating and forwarding a correct ICMP message back to the internal sender. We synthesize the message based on the information in the extended error structure, plus the returned part of the original message body. Note that for the sake of completeness, we even produce ICMP messages for other error codes. We have noticed that at least ICMP_PROT_UNREACH is propagated as an error event back to the user. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Jon Maloy <jmaloy@redhat.com> [sbrivio: fix cppcheck warning: udp_send_conn_fail_icmp4() doesn't modify 'in', it can be declared as const] Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--tap.c2
-rw-r--r--tap.h2
-rw-r--r--udp.c87
-rw-r--r--udp_internal.h2
-rw-r--r--udp_vu.c4
5 files changed, 81 insertions, 16 deletions
diff --git a/tap.c b/tap.c
index 6f7063e..57d0795 100644
--- a/tap.c
+++ b/tap.c
@@ -159,7 +159,7 @@ void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
ip4h->saddr = src.s_addr;
ip4h->daddr = dst.s_addr;
ip4h->check = csum_ip4_header(l3len, proto, src, dst);
- return ip4h + 1;
+ return (char *)ip4h + sizeof(*ip4h);
}
/**
diff --git a/tap.h b/tap.h
index a2cf9bc..9ac17ce 100644
--- a/tap.h
+++ b/tap.h
@@ -50,6 +50,8 @@ void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
const void *in, size_t dlen);
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
+ struct in_addr dst, size_t l4len, uint8_t proto);
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
const void *in, size_t dlen);
diff --git a/udp.c b/udp.c
index 923cc38..b72c3ce 100644
--- a/udp.c
+++ b/udp.c
@@ -87,6 +87,7 @@
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
#include <stdint.h>
#include <stddef.h>
#include <string.h>
@@ -112,6 +113,9 @@
#include "udp_internal.h"
#include "udp_vu.h"
+/* Maximum UDP data to be returned in ICMP messages */
+#define ICMP4_MAX_DLEN 8
+
/* "Spliced" sockets indexed by bound port (host order) */
static int udp_splice_ns [IP_VERSIONS][NUM_PORTS];
static int udp_splice_init[IP_VERSIONS][NUM_PORTS];
@@ -403,24 +407,75 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
}
/**
+ * udp_send_conn_fail_icmp4() - Construct and send ICMPv4 to local peer
+ * @c: Execution context
+ * @ee: Extended error descriptor
+ * @toside: Destination side of flow
+ * @saddr: Address of ICMP generating node
+ * @in: First bytes (max 8) of original UDP message body
+ * @dlen: Length of the read part of original UDP message body
+ */
+static void udp_send_conn_fail_icmp4(const struct ctx *c,
+ const struct sock_extended_err *ee,
+ const struct flowside *toside,
+ struct in_addr saddr,
+ const void *in, size_t dlen)
+{
+ struct in_addr oaddr = toside->oaddr.v4mapped.a4;
+ struct in_addr eaddr = toside->eaddr.v4mapped.a4;
+ in_port_t eport = toside->eport;
+ in_port_t oport = toside->oport;
+ struct {
+ struct icmphdr icmp4h;
+ struct iphdr ip4h;
+ struct udphdr uh;
+ char data[ICMP4_MAX_DLEN];
+ } __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+ size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+ size_t l4len = dlen + sizeof(struct udphdr);
+
+ ASSERT(dlen <= ICMP4_MAX_DLEN);
+ memset(&msg, 0, sizeof(msg));
+ msg.icmp4h.type = ee->ee_type;
+ msg.icmp4h.code = ee->ee_code;
+ if (ee->ee_type == ICMP_DEST_UNREACH && ee->ee_code == ICMP_FRAG_NEEDED)
+ msg.icmp4h.un.frag.mtu = htons((uint16_t) ee->ee_info);
+
+ /* Reconstruct the original headers as returned in the ICMP message */
+ tap_push_ip4h(&msg.ip4h, eaddr, oaddr, l4len, IPPROTO_UDP);
+ tap_push_uh4(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+ memcpy(&msg.data, in, dlen);
+
+ tap_icmp4_send(c, saddr, eaddr, &msg, msglen);
+}
+
+/**
* udp_sock_recverr() - Receive and clear an error from a socket
- * @s: Socket to receive from
+ * @c: Execution context
+ * @ref: epoll reference
*
* Return: 1 if error received and processed, 0 if no more errors in queue, < 0
* if there was an error reading the queue
*
* #syscalls recvmsg
*/
-static int udp_sock_recverr(int s)
+static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
{
const struct sock_extended_err *ee;
const struct cmsghdr *hdr;
+ union sockaddr_inany saddr;
char buf[CMSG_SPACE(sizeof(*ee))];
+ char data[ICMP4_MAX_DLEN];
+ int s = ref.fd;
+ struct iovec iov = {
+ .iov_base = data,
+ .iov_len = sizeof(data)
+ };
struct msghdr mh = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = NULL,
- .msg_iovlen = 0,
+ .msg_name = &saddr,
+ .msg_namelen = sizeof(saddr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
.msg_control = buf,
.msg_controllen = sizeof(buf),
};
@@ -450,8 +505,15 @@ static int udp_sock_recverr(int s)
}
ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
+ if (ref.type == EPOLL_TYPE_UDP_REPLY) {
+ flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
+ const struct flowside *toside = flowside_at_sidx(sidx);
- /* TODO: When possible propagate and otherwise handle errors */
+ udp_send_conn_fail_icmp4(c, ee, toside, saddr.sa4.sin_addr,
+ data, rc);
+ } else {
+ trace("Ignoring received IP_RECVERR cmsg on listener socket");
+ }
debug("%s error on UDP socket %i: %s",
str_ee_origin(ee), s, strerror_(ee->ee_errno));
@@ -461,15 +523,16 @@ static int udp_sock_recverr(int s)
/**
* udp_sock_errs() - Process errors on a socket
* @c: Execution context
- * @s: Socket to receive from
+ * @ref: epoll reference
* @events: epoll events bitmap
*
* Return: Number of errors handled, or < 0 if we have an unrecoverable error
*/
-int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
+int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events)
{
unsigned n_err = 0;
socklen_t errlen;
+ int s = ref.fd;
int rc, err;
ASSERT(!c->no_udp);
@@ -478,7 +541,7 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
return 0; /* Nothing to do */
/* Empty the error queue */
- while ((rc = udp_sock_recverr(s)) > 0)
+ while ((rc = udp_sock_recverr(c, ref)) > 0)
n_err += rc;
if (rc < 0)
@@ -558,7 +621,7 @@ static void udp_buf_listen_sock_handler(const struct ctx *c,
const socklen_t sasize = sizeof(udp_meta[0].s_in);
int n, i;
- if (udp_sock_errs(c, ref.fd, events) < 0) {
+ if (udp_sock_errs(c, ref, events) < 0) {
err("UDP: Unrecoverable error on listening socket:"
" (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
/* FIXME: what now? close/re-open socket? */
@@ -661,7 +724,7 @@ static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
from_s = uflow->s[ref.flowside.sidei];
- if (udp_sock_errs(c, from_s, events) < 0) {
+ if (udp_sock_errs(c, ref, events) < 0) {
flow_err(uflow, "Unrecoverable error on reply socket");
flow_err_details(uflow);
udp_flow_close(c, uflow);
diff --git a/udp_internal.h b/udp_internal.h
index cc80e30..3b081f5 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -30,5 +30,5 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
const struct flowside *toside, size_t dlen,
bool no_udp_csum);
-int udp_sock_errs(const struct ctx *c, int s, uint32_t events);
+int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events);
#endif /* UDP_INTERNAL_H */
diff --git a/udp_vu.c b/udp_vu.c
index 4123510..c26a223 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -227,7 +227,7 @@ void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int i;
- if (udp_sock_errs(c, ref.fd, events) < 0) {
+ if (udp_sock_errs(c, ref, events) < 0) {
err("UDP: Unrecoverable error on listening socket:"
" (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
return;
@@ -302,7 +302,7 @@ void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
ASSERT(!c->no_udp);
- if (udp_sock_errs(c, from_s, events) < 0) {
+ if (udp_sock_errs(c, ref, events) < 0) {
flow_err(uflow, "Unrecoverable error on reply socket");
flow_err_details(uflow);
udp_flow_close(c, uflow);