aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorLaurent Vivier <lvivier@redhat.com>2024-08-02 18:10:36 +0200
committerStefano Brivio <sbrivio@redhat.com>2024-08-05 17:38:17 +0200
commite877f905e5fc5900c2c8dd9378e39705b21aec82 (patch)
treefcc5d4f184448e3c703b9b69f36c7f2b5b5d1990
parent623ceb1f2b9051e3c6b34c99463a22a558b74674 (diff)
downloadpasst-e877f905e5fc5900c2c8dd9378e39705b21aec82.tar
passt-e877f905e5fc5900c2c8dd9378e39705b21aec82.tar.gz
passt-e877f905e5fc5900c2c8dd9378e39705b21aec82.tar.bz2
passt-e877f905e5fc5900c2c8dd9378e39705b21aec82.tar.lz
passt-e877f905e5fc5900c2c8dd9378e39705b21aec82.tar.xz
passt-e877f905e5fc5900c2c8dd9378e39705b21aec82.tar.zst
passt-e877f905e5fc5900c2c8dd9378e39705b21aec82.zip
udp_flow: move all udp_flow functions to udp_flow.c
No code change. They need to be exported to be available by the vhost-user version of passt. Signed-off-by: Laurent Vivier <lvivier@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--Makefile2
-rw-r--r--udp.c260
-rw-r--r--udp_flow.c274
-rw-r--r--udp_flow.h9
4 files changed, 284 insertions, 261 deletions
diff --git a/Makefile b/Makefile
index bd504d2..b6329e3 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
- tcp_buf.c tcp_splice.c udp.c util.c
+ tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c
QRAP_SRCS = qrap.c
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
diff --git a/udp.c b/udp.c
index f27a00b..7731257 100644
--- a/udp.c
+++ b/udp.c
@@ -95,7 +95,6 @@
#include <sys/socket.h>
#include <sys/uio.h>
#include <time.h>
-#include <fcntl.h>
#include <arpa/inet.h>
#include <linux/errqueue.h>
@@ -111,7 +110,6 @@
#include "log.h"
#include "flow_table.h"
-#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
/* "Spliced" sockets indexed by bound port (host order) */
@@ -277,199 +275,6 @@ static void udp_iov_init(const struct ctx *c)
}
/**
- * udp_at_sidx() - Get UDP specific flow at given sidx
- * @sidx: Flow and side to retrieve
- *
- * Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if
- * the flow at @sidx is not FLOW_UDP.
- */
-struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
-{
- union flow *flow = flow_at_sidx(sidx);
-
- if (!flow)
- return NULL;
-
- ASSERT(flow->f.type == FLOW_UDP);
- return &flow->udp;
-}
-
-/*
- * udp_flow_close() - Close and clean up UDP flow
- * @c: Execution context
- * @uflow: UDP flow
- */
-static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
-{
- if (uflow->s[INISIDE] >= 0) {
- /* The listening socket needs to stay in epoll */
- close(uflow->s[INISIDE]);
- uflow->s[INISIDE] = -1;
- }
-
- if (uflow->s[TGTSIDE] >= 0) {
- /* But the flow specific one needs to be removed */
- epoll_ctl(c->epollfd, EPOLL_CTL_DEL, uflow->s[TGTSIDE], NULL);
- close(uflow->s[TGTSIDE]);
- uflow->s[TGTSIDE] = -1;
- }
- flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
- if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
- flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
-}
-
-/**
- * udp_flow_new() - Common setup for a new UDP flow
- * @c: Execution context
- * @flow: Initiated flow
- * @s_ini: Initiating socket (or -1)
- * @now: Timestamp
- *
- * Return: UDP specific flow, if successful, NULL on failure
- */
-static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
- int s_ini, const struct timespec *now)
-{
- const struct flowside *ini = &flow->f.side[INISIDE];
- struct udp_flow *uflow = NULL;
- const struct flowside *tgt;
- uint8_t tgtpif;
-
- if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0) {
- flow_trace(flow, "Invalid endpoint to initiate UDP flow");
- goto cancel;
- }
-
- if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
- goto cancel;
- tgtpif = flow->f.pif[TGTSIDE];
-
- uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
- uflow->ts = now->tv_sec;
- uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
-
- if (s_ini >= 0) {
- /* When using auto port-scanning the listening port could go
- * away, so we need to duplicate the socket
- */
- uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0);
- if (uflow->s[INISIDE] < 0) {
- flow_err(uflow,
- "Couldn't duplicate listening socket: %s",
- strerror(errno));
- goto cancel;
- }
- }
-
- if (pif_is_socket(tgtpif)) {
- struct mmsghdr discard[UIO_MAXIOV] = { 0 };
- union {
- flow_sidx_t sidx;
- uint32_t data;
- } fref = {
- .sidx = FLOW_SIDX(flow, TGTSIDE),
- };
- int rc;
-
- uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY,
- tgtpif, tgt, fref.data);
- if (uflow->s[TGTSIDE] < 0) {
- flow_dbg(uflow,
- "Couldn't open socket for spliced flow: %s",
- strerror(errno));
- goto cancel;
- }
-
- if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) {
- flow_dbg(uflow,
- "Couldn't connect flow socket: %s",
- strerror(errno));
- goto cancel;
- }
-
- /* It's possible, if unlikely, that we could receive some
- * unrelated packets in between the bind() and connect() of this
- * socket. For now we just discard these. We could consider
- * trying to redirect these to an appropriate handler, if we
- * need to.
- */
- rc = recvmmsg(uflow->s[TGTSIDE], discard, ARRAY_SIZE(discard),
- MSG_DONTWAIT, NULL);
- if (rc >= ARRAY_SIZE(discard)) {
- flow_dbg(uflow,
- "Too many (%d) spurious reply datagrams", rc);
- goto cancel;
- } else if (rc > 0) {
- flow_trace(uflow,
- "Discarded %d spurious reply datagrams", rc);
- } else if (errno != EAGAIN) {
- flow_err(uflow,
- "Unexpected error discarding datagrams: %s",
- strerror(errno));
- }
- }
-
- flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE));
-
- /* If the target side is a socket, it will be a reply socket that knows
- * its own flowside. But if it's tap, then we need to look it up by
- * hash.
- */
- if (!pif_is_socket(tgtpif))
- flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE));
- FLOW_ACTIVATE(uflow);
-
- return FLOW_SIDX(uflow, TGTSIDE);
-
-cancel:
- if (uflow)
- udp_flow_close(c, uflow);
- flow_alloc_cancel(flow);
- return FLOW_SIDX_NONE;
-}
-
-/**
- * udp_flow_from_sock() - Find or create UDP flow for "listening" socket
- * @c: Execution context
- * @ref: epoll reference of the receiving socket
- * @s_in: Source socket address, filled in by recvmmsg()
- * @now: Timestamp
- *
- * #syscalls fcntl
- *
- * Return: sidx for the destination side of the flow for this packet, or
- * FLOW_SIDX_NONE if we couldn't find or create a flow.
- */
-static flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
- const union sockaddr_inany *s_in,
- const struct timespec *now)
-{
- struct udp_flow *uflow;
- union flow *flow;
- flow_sidx_t sidx;
-
- ASSERT(ref.type == EPOLL_TYPE_UDP_LISTEN);
-
- sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, s_in, ref.udp.port);
- if ((uflow = udp_at_sidx(sidx))) {
- uflow->ts = now->tv_sec;
- return flow_sidx_opposite(sidx);
- }
-
- if (!(flow = flow_alloc())) {
- char sastr[SOCKADDR_STRLEN];
-
- debug("Couldn't allocate flow for UDP datagram from %s %s",
- pif_name(ref.udp.pif),
- sockaddr_ntop(s_in, sastr, sizeof(sastr)));
- return FLOW_SIDX_NONE;
- }
-
- flow_initiate_sa(flow, ref.udp.pif, s_in, ref.udp.port);
- return udp_flow_new(c, flow, ref.fd, now);
-}
-
-/**
* udp_splice_prepare() - Prepare one datagram for splicing
* @mmh: Receiving mmsghdr array
* @idx: Index of the datagram to prepare
@@ -805,53 +610,6 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
}
/**
- * udp_flow_from_tap() - Find or create UDP flow for tap packets
- * @c: Execution context
- * @pif: pif on which the packet is arriving
- * @af: Address family, AF_INET or AF_INET6
- * @saddr: Source address on guest side
- * @daddr: Destination address guest side
- * @srcport: Source port on guest side
- * @dstport: Destination port on guest side
- *
- * Return: sidx for the destination side of the flow for this packet, or
- * FLOW_SIDX_NONE if we couldn't find or create a flow.
- */
-static flow_sidx_t udp_flow_from_tap(const struct ctx *c,
- uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr,
- in_port_t srcport, in_port_t dstport,
- const struct timespec *now)
-{
- struct udp_flow *uflow;
- union flow *flow;
- flow_sidx_t sidx;
-
- ASSERT(pif == PIF_TAP);
-
- sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
- srcport, dstport);
- if ((uflow = udp_at_sidx(sidx))) {
- uflow->ts = now->tv_sec;
- return flow_sidx_opposite(sidx);
- }
-
- if (!(flow = flow_alloc())) {
- char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
-
- debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu",
- pif_name(pif),
- inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport,
- inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport);
- return FLOW_SIDX_NONE;
- }
-
- flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport);
-
- return udp_flow_new(c, flow, -1, now);
-}
-
-/**
* udp_tap_handler() - Handle packets from tap
* @c: Execution context
* @pif: pif on which the packet is arriving
@@ -1099,24 +857,6 @@ static int udp_port_rebind_outbound(void *arg)
}
/**
- * udp_flow_timer() - Handler for timed events related to a given flow
- * @c: Execution context
- * @uflow: UDP flow
- * @now: Current timestamp
- *
- * Return: true if the flow is ready to free, false otherwise
- */
-bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
- const struct timespec *now)
-{
- if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT)
- return false;
-
- udp_flow_close(c, uflow);
- return true;
-}
-
-/**
* udp_timer() - Scan activity bitmaps for ports with associated timed events
* @c: Execution context
* @now: Current timestamp
diff --git a/udp_flow.c b/udp_flow.c
new file mode 100644
index 0000000..8b25ad1
--- /dev/null
+++ b/udp_flow.c
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * UDP flow tracking functions
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+
+#include "util.h"
+#include "passt.h"
+#include "flow_table.h"
+
+#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
+
+/**
+ * udp_at_sidx() - Get UDP specific flow at given sidx
+ * @sidx: Flow and side to retrieve
+ *
+ * Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if
+ * the flow at @sidx is not FLOW_UDP.
+ */
+struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
+{
+ union flow *flow = flow_at_sidx(sidx);
+
+ if (!flow)
+ return NULL;
+
+ ASSERT(flow->f.type == FLOW_UDP);
+ return &flow->udp;
+}
+
+/*
+ * udp_flow_close() - Close and clean up UDP flow
+ * @c: Execution context
+ * @uflow: UDP flow
+ */
+static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
+{
+ if (uflow->s[INISIDE] >= 0) {
+ /* The listening socket needs to stay in epoll */
+ close(uflow->s[INISIDE]);
+ uflow->s[INISIDE] = -1;
+ }
+
+ if (uflow->s[TGTSIDE] >= 0) {
+ /* But the flow specific one needs to be removed */
+ epoll_ctl(c->epollfd, EPOLL_CTL_DEL, uflow->s[TGTSIDE], NULL);
+ close(uflow->s[TGTSIDE]);
+ uflow->s[TGTSIDE] = -1;
+ }
+ flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
+ if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
+ flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
+}
+
+/**
+ * udp_flow_new() - Common setup for a new UDP flow
+ * @c: Execution context
+ * @flow: Initiated flow
+ * @s_ini: Initiating socket (or -1)
+ * @now: Timestamp
+ *
+ * Return: UDP specific flow, if successful, NULL on failure
+ */
+static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
+ int s_ini, const struct timespec *now)
+{
+ const struct flowside *ini = &flow->f.side[INISIDE];
+ struct udp_flow *uflow = NULL;
+ const struct flowside *tgt;
+ uint8_t tgtpif;
+
+ if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0) {
+ flow_trace(flow, "Invalid endpoint to initiate UDP flow");
+ goto cancel;
+ }
+
+ if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
+ goto cancel;
+ tgtpif = flow->f.pif[TGTSIDE];
+
+ uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
+ uflow->ts = now->tv_sec;
+ uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
+
+ if (s_ini >= 0) {
+ /* When using auto port-scanning the listening port could go
+ * away, so we need to duplicate the socket
+ */
+ uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0);
+ if (uflow->s[INISIDE] < 0) {
+ flow_err(uflow,
+ "Couldn't duplicate listening socket: %s",
+ strerror(errno));
+ goto cancel;
+ }
+ }
+
+ if (pif_is_socket(tgtpif)) {
+ struct mmsghdr discard[UIO_MAXIOV] = { 0 };
+ union {
+ flow_sidx_t sidx;
+ uint32_t data;
+ } fref = {
+ .sidx = FLOW_SIDX(flow, TGTSIDE),
+ };
+ int rc;
+
+ uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY,
+ tgtpif, tgt, fref.data);
+ if (uflow->s[TGTSIDE] < 0) {
+ flow_dbg(uflow,
+ "Couldn't open socket for spliced flow: %s",
+ strerror(errno));
+ goto cancel;
+ }
+
+ if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) {
+ flow_dbg(uflow,
+ "Couldn't connect flow socket: %s",
+ strerror(errno));
+ goto cancel;
+ }
+
+ /* It's possible, if unlikely, that we could receive some
+ * unrelated packets in between the bind() and connect() of this
+ * socket. For now we just discard these. We could consider
+ * trying to redirect these to an appropriate handler, if we
+ * need to.
+ */
+ rc = recvmmsg(uflow->s[TGTSIDE], discard, ARRAY_SIZE(discard),
+ MSG_DONTWAIT, NULL);
+ if (rc >= ARRAY_SIZE(discard)) {
+ flow_dbg(uflow,
+ "Too many (%d) spurious reply datagrams", rc);
+ goto cancel;
+ } else if (rc > 0) {
+ flow_trace(uflow,
+ "Discarded %d spurious reply datagrams", rc);
+ } else if (errno != EAGAIN) {
+ flow_err(uflow,
+ "Unexpected error discarding datagrams: %s",
+ strerror(errno));
+ }
+ }
+
+ flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE));
+
+ /* If the target side is a socket, it will be a reply socket that knows
+ * its own flowside. But if it's tap, then we need to look it up by
+ * hash.
+ */
+ if (!pif_is_socket(tgtpif))
+ flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE));
+ FLOW_ACTIVATE(uflow);
+
+ return FLOW_SIDX(uflow, TGTSIDE);
+
+cancel:
+ if (uflow)
+ udp_flow_close(c, uflow);
+ flow_alloc_cancel(flow);
+ return FLOW_SIDX_NONE;
+}
+
+/**
+ * udp_flow_from_sock() - Find or create UDP flow for "listening" socket
+ * @c: Execution context
+ * @ref: epoll reference of the receiving socket
+ * @s_in: Source socket address, filled in by recvmmsg()
+ * @now: Timestamp
+ *
+ * #syscalls fcntl
+ *
+ * Return: sidx for the destination side of the flow for this packet, or
+ * FLOW_SIDX_NONE if we couldn't find or create a flow.
+ */
+flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
+ const union sockaddr_inany *s_in,
+ const struct timespec *now)
+{
+ struct udp_flow *uflow;
+ union flow *flow;
+ flow_sidx_t sidx;
+
+ ASSERT(ref.type == EPOLL_TYPE_UDP_LISTEN);
+
+ sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, s_in, ref.udp.port);
+ if ((uflow = udp_at_sidx(sidx))) {
+ uflow->ts = now->tv_sec;
+ return flow_sidx_opposite(sidx);
+ }
+
+ if (!(flow = flow_alloc())) {
+ char sastr[SOCKADDR_STRLEN];
+
+ debug("Couldn't allocate flow for UDP datagram from %s %s",
+ pif_name(ref.udp.pif),
+ sockaddr_ntop(s_in, sastr, sizeof(sastr)));
+ return FLOW_SIDX_NONE;
+ }
+
+ flow_initiate_sa(flow, ref.udp.pif, s_in, ref.udp.port);
+ return udp_flow_new(c, flow, ref.fd, now);
+}
+
+/**
+ * udp_flow_from_tap() - Find or create UDP flow for tap packets
+ * @c: Execution context
+ * @pif: pif on which the packet is arriving
+ * @af: Address family, AF_INET or AF_INET6
+ * @saddr: Source address on guest side
+ * @daddr: Destination address guest side
+ * @srcport: Source port on guest side
+ * @dstport: Destination port on guest side
+ *
+ * Return: sidx for the destination side of the flow for this packet, or
+ * FLOW_SIDX_NONE if we couldn't find or create a flow.
+ */
+flow_sidx_t udp_flow_from_tap(const struct ctx *c,
+ uint8_t pif, sa_family_t af,
+ const void *saddr, const void *daddr,
+ in_port_t srcport, in_port_t dstport,
+ const struct timespec *now)
+{
+ struct udp_flow *uflow;
+ union flow *flow;
+ flow_sidx_t sidx;
+
+ ASSERT(pif == PIF_TAP);
+
+ sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
+ srcport, dstport);
+ if ((uflow = udp_at_sidx(sidx))) {
+ uflow->ts = now->tv_sec;
+ return flow_sidx_opposite(sidx);
+ }
+
+ if (!(flow = flow_alloc())) {
+ char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
+
+ debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu",
+ pif_name(pif),
+ inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport,
+ inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport);
+ return FLOW_SIDX_NONE;
+ }
+
+ flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport);
+
+ return udp_flow_new(c, flow, -1, now);
+}
+
+/**
+ * udp_flow_timer() - Handler for timed events related to a given flow
+ * @c: Execution context
+ * @uflow: UDP flow
+ * @now: Current timestamp
+ *
+ * Return: true if the flow is ready to free, false otherwise
+ */
+bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
+ const struct timespec *now)
+{
+ if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT)
+ return false;
+
+ udp_flow_close(c, uflow);
+ return true;
+}
diff --git a/udp_flow.h b/udp_flow.h
index e0736f8..12ddf03 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -21,6 +21,15 @@ struct udp_flow {
int s[SIDES];
};
+struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
+flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
+ const union sockaddr_inany *s_in,
+ const struct timespec *now);
+flow_sidx_t udp_flow_from_tap(const struct ctx *c,
+ uint8_t pif, sa_family_t af,
+ const void *saddr, const void *daddr,
+ in_port_t srcport, in_port_t dstport,
+ const struct timespec *now);
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
const struct timespec *now);