aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--flow.c32
-rw-r--r--flow.h4
-rw-r--r--flow_table.h15
-rw-r--r--udp.c169
-rw-r--r--udp_flow.h25
6 files changed, 242 insertions, 5 deletions
diff --git a/Makefile b/Makefile
index 09fc461..92cbd5a 100644
--- a/Makefile
+++ b/Makefile
@@ -57,7 +57,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
- udp.h util.h
+ udp.h udp_flow.h util.h
HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
diff --git a/flow.c b/flow.c
index 27340df..4e337d4 100644
--- a/flow.c
+++ b/flow.c
@@ -37,6 +37,7 @@ const char *flow_type_str[] = {
[FLOW_TCP_SPLICE] = "TCP connection (spliced)",
[FLOW_PING4] = "ICMP ping sequence",
[FLOW_PING6] = "ICMPv6 ping sequence",
+ [FLOW_UDP] = "UDP flow",
};
static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
"flow_type_str[] doesn't match enum flow_type");
@@ -46,6 +47,7 @@ const uint8_t flow_proto[] = {
[FLOW_TCP_SPLICE] = IPPROTO_TCP,
[FLOW_PING4] = IPPROTO_ICMP,
[FLOW_PING6] = IPPROTO_ICMPV6,
+ [FLOW_UDP] = IPPROTO_UDP,
};
static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
"flow_proto[] doesn't match enum flow_type");
@@ -702,6 +704,32 @@ flow_sidx_t flow_lookup_af(const struct ctx *c,
}
/**
+ * flow_lookup_sa() - Look up a flow given an endpoint socket address
+ * @c: Execution context
+ * @proto: Protocol of the flow (IP L4 protocol number)
+ * @pif: Interface of the flow
+ * @esa: Socket address of the endpoint
+ * @fport: Forwarding port number
+ *
+ * Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
+ */
+flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
+ const void *esa, in_port_t fport)
+{
+ struct flowside side = {
+ .fport = fport,
+ };
+
+ inany_from_sockaddr(&side.eaddr, &side.eport, esa);
+ if (inany_v4(&side.eaddr))
+ side.faddr = inany_any4;
+ else
+ side.faddr = inany_any6;
+
+ return flowside_lookup(c, proto, pif, &side);
+}
+
+/**
* flow_defer_handler() - Handler for per-flow deferred and timed tasks
* @c: Execution context
* @now: Current timestamp
@@ -780,6 +808,10 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
if (timer)
closed = icmp_ping_timer(c, &flow->ping, now);
break;
+ case FLOW_UDP:
+ if (timer)
+ closed = udp_flow_timer(c, &flow->udp, now);
+ break;
default:
/* Assume other flow types don't need any handling */
;
diff --git a/flow.h b/flow.h
index bf6b845..7866477 100644
--- a/flow.h
+++ b/flow.h
@@ -115,6 +115,8 @@ enum flow_type {
FLOW_PING4,
/* ICMPv6 echo requests from guest to host and matching replies back */
FLOW_PING6,
+ /* UDP pseudo-connection */
+ FLOW_UDP,
FLOW_NUM_TYPES,
};
@@ -238,6 +240,8 @@ flow_sidx_t flow_lookup_af(const struct ctx *c,
uint8_t proto, uint8_t pif, sa_family_t af,
const void *eaddr, const void *faddr,
in_port_t eport, in_port_t fport);
+flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
+ const void *esa, in_port_t fport);
union flow;
diff --git a/flow_table.h b/flow_table.h
index 9d912c8..df253be 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -9,6 +9,7 @@
#include "tcp_conn.h"
#include "icmp_flow.h"
+#include "udp_flow.h"
/**
* struct flow_free_cluster - Information about a cluster of free entries
@@ -35,6 +36,7 @@ union flow {
struct tcp_tap_conn tcp;
struct tcp_splice_conn tcp_splice;
struct icmp_ping_flow ping;
+ struct udp_flow udp;
};
/* Global Flow Table */
@@ -98,6 +100,19 @@ static inline uint8_t pif_at_sidx(flow_sidx_t sidx)
return flow->f.pif[sidx.sidei];
}
+/** flow_sidx_opposite() - Get the other side of the same flow
+ * @sidx: Flow & side index
+ *
+ * Return: sidx for the other side of the same flow as @sidx
+ */
+static inline flow_sidx_t flow_sidx_opposite(flow_sidx_t sidx)
+{
+ if (!flow_sidx_valid(sidx))
+ return FLOW_SIDX_NONE;
+
+ return (flow_sidx_t){.flowi = sidx.flowi, .sidei = !sidx.sidei};
+}
+
/** flow_sidx() - Index of one side of a flow from common structure
* @f: Common flow fields pointer
* @sidei: Which side to refer to (0 or 1)
diff --git a/udp.c b/udp.c
index 150f970..fdbe396 100644
--- a/udp.c
+++ b/udp.c
@@ -15,6 +15,30 @@
/**
* DOC: Theory of Operation
*
+ * UDP Flows
+ * =========
+ *
+ * UDP doesn't have true connections, but many protocols use a connection-like
+ * format. The flow is initiated by a client sending a datagram from a port of
+ * its choosing (usually ephemeral) to a specific port (usually well known) on a
+ * server. Both client and server address must be unicast. The server sends
+ * replies using the same addresses & ports with src/dest swapped.
+ *
+ * We track pseudo-connections of this type as flow table entries of type
+ * FLOW_UDP. We store the time of the last traffic on the flow in uflow->ts,
+ * and let the flow expire if there is no traffic for UDP_CONN_TIMEOUT seconds.
+ *
+ * NOTE: This won't handle multicast protocols, or some protocols with different
+ * port usage. We'll need specific logic if we want to handle those.
+ *
+ * "Listening" sockets
+ * ===================
+ *
+ * UDP doesn't use listen(), but we consider long term sockets which are allowed
+ * to create new flows "listening" by analogy with TCP.
+ *
+ * Port tracking
+ * =============
*
* For UDP, a reduced version of port-based connection tracking is implemented
* with two purposes:
@@ -122,6 +146,7 @@
#include "tap.h"
#include "pcap.h"
#include "log.h"
+#include "flow_table.h"
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
@@ -200,6 +225,7 @@ static struct ethhdr udp6_eth_hdr;
* @taph: Tap backend specific header
* @s_in: Source socket address, filled in by recvmmsg()
* @splicesrc: Source port for splicing, or -1 if not spliceable
+ * @tosidx: sidx for the destination side of this datagram's flow
*/
static struct udp_meta_t {
struct ipv6hdr ip6h;
@@ -208,6 +234,7 @@ static struct udp_meta_t {
union sockaddr_inany s_in;
int splicesrc;
+ flow_sidx_t tosidx;
}
#ifdef __AVX2__
__attribute__ ((aligned(32)))
@@ -492,6 +519,115 @@ static int udp_mmh_splice_port(union epoll_ref ref, const struct mmsghdr *mmh)
}
/**
+ * udp_at_sidx() - Get UDP specific flow at given sidx
+ * @sidx: Flow and side to retrieve
+ *
+ * Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if
+ * the flow at @sidx is not FLOW_UDP.
+ */
+struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
+{
+ union flow *flow = flow_at_sidx(sidx);
+
+ if (!flow)
+ return NULL;
+
+ ASSERT(flow->f.type == FLOW_UDP);
+ return &flow->udp;
+}
+
+/*
+ * udp_flow_close() - Close and clean up UDP flow
+ * @c: Execution context
+ * @uflow: UDP flow
+ */
+static void udp_flow_close(const struct ctx *c, const struct udp_flow *uflow)
+{
+ flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
+}
+
+/**
+ * udp_flow_new() - Common setup for a new UDP flow
+ * @c: Execution context
+ * @flow: Initiated flow
+ * @now: Timestamp
+ *
+ * Return: UDP specific flow, if successful, NULL on failure
+ */
+static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
+ const struct timespec *now)
+{
+ const struct flowside *ini = &flow->f.side[INISIDE];
+ struct udp_flow *uflow = NULL;
+
+ if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0) {
+ flow_trace(flow, "Invalid endpoint to initiate UDP flow");
+ goto cancel;
+ }
+
+ if (!flow_target(c, flow, IPPROTO_UDP))
+ goto cancel;
+
+ uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
+ uflow->ts = now->tv_sec;
+
+ flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE));
+ FLOW_ACTIVATE(uflow);
+
+ return FLOW_SIDX(uflow, TGTSIDE);
+
+cancel:
+ if (uflow)
+ udp_flow_close(c, uflow);
+ flow_alloc_cancel(flow);
+ return FLOW_SIDX_NONE;
+
+}
+
+/**
+ * udp_flow_from_sock() - Find or create UDP flow for "listening" socket
+ * @c: Execution context
+ * @ref: epoll reference of the receiving socket
+ * @meta: Metadata buffer for the datagram
+ * @now: Timestamp
+ *
+ * Return: sidx for the destination side of the flow for this packet, or
+ * FLOW_SIDX_NONE if we couldn't find or create a flow.
+ */
+static flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
+ struct udp_meta_t *meta,
+ const struct timespec *now)
+{
+ struct udp_flow *uflow;
+ union flow *flow;
+ flow_sidx_t sidx;
+
+ ASSERT(ref.type == EPOLL_TYPE_UDP);
+
+ /* FIXME: Match reply packets to their flow as well */
+ if (!ref.udp.orig)
+ return FLOW_SIDX_NONE;
+
+ sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, &meta->s_in, ref.udp.port);
+ if ((uflow = udp_at_sidx(sidx))) {
+ uflow->ts = now->tv_sec;
+ return flow_sidx_opposite(sidx);
+ }
+
+ if (!(flow = flow_alloc())) {
+ char sastr[SOCKADDR_STRLEN];
+
+ debug("Couldn't allocate flow for UDP datagram from %s %s",
+ pif_name(ref.udp.pif),
+ sockaddr_ntop(&meta->s_in, sastr, sizeof(sastr)));
+ return FLOW_SIDX_NONE;
+ }
+
+ flow_initiate_sa(flow, ref.udp.pif, &meta->s_in, ref.udp.port);
+ return udp_flow_new(c, flow, now);
+}
+
+/**
* udp_splice_prepare() - Prepare one datagram for splicing
* @mmh: Receiving mmsghdr array
* @idx: Index of the datagram to prepare
@@ -848,12 +984,15 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
dstport += c->udp.fwd_in.f.delta[dstport];
/* We divide datagrams into batches based on how we need to send them,
- * determined by udp_meta[i].splicesrc. To avoid either two passes
- * through the array, or recalculating splicesrc for a single entry, we
- * have to populate it one entry *ahead* of the loop counter.
+ * determined by udp_meta[i].splicesrc and udp_meta[i].tosidx. To avoid
+ * either two passes through the array, or recalculating splicesrc and
+ * tosidxfor a single entry, we have to populate it one entry *ahead* of
+ * the loop counter.
*/
udp_meta[0].splicesrc = udp_mmh_splice_port(ref, mmh_recv);
+ udp_meta[0].tosidx = udp_flow_from_sock(c, ref, &udp_meta[0], now);
for (i = 0; i < n; ) {
+ flow_sidx_t batchsidx = udp_meta[i].tosidx;
int batchsrc = udp_meta[i].splicesrc;
int batchstart = i;
@@ -870,7 +1009,11 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
udp_meta[i].splicesrc = udp_mmh_splice_port(ref,
&mmh_recv[i]);
- } while (udp_meta[i].splicesrc == batchsrc);
+ udp_meta[i].tosidx = udp_flow_from_sock(c, ref,
+ &udp_meta[i],
+ now);
+ } while (flow_sidx_eq(udp_meta[i].tosidx, batchsidx) &&
+ udp_meta[i].splicesrc == batchsrc);
if (batchsrc >= 0) {
udp_splice_send(c, batchstart, i - batchstart,
@@ -1269,6 +1412,24 @@ static int udp_port_rebind_outbound(void *arg)
}
/**
+ * udp_flow_timer() - Handler for timed events related to a given flow
+ * @c: Execution context
+ * @uflow: UDP flow
+ * @now: Current timestamp
+ *
+ * Return: true if the flow is ready to free, false otherwise
+ */
+bool udp_flow_timer(const struct ctx *c, const struct udp_flow *uflow,
+ const struct timespec *now)
+{
+ if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT)
+ return false;
+
+ udp_flow_close(c, uflow);
+ return true;
+}
+
+/**
* udp_timer() - Scan activity bitmaps for ports with associated timed events
* @c: Execution context
* @now: Current timestamp
diff --git a/udp_flow.h b/udp_flow.h
new file mode 100644
index 0000000..18af9ac
--- /dev/null
+++ b/udp_flow.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * UDP flow tracking data structures
+ */
+#ifndef UDP_FLOW_H
+#define UDP_FLOW_H
+
+/**
+ * struct udp - Descriptor for a flow of UDP packets
+ * @f: Generic flow information
+ * @ts: Activity timestamp
+ */
+struct udp_flow {
+ /* Must be first element */
+ struct flow_common f;
+
+ time_t ts;
+};
+
+bool udp_flow_timer(const struct ctx *c, const struct udp_flow *uflow,
+ const struct timespec *now);
+
+#endif /* UDP_FLOW_H */