diff options
Diffstat (limited to 'udp_flow.c')
-rw-r--r-- | udp_flow.c | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/udp_flow.c b/udp_flow.c new file mode 100644 index 0000000..cef3fb5 --- /dev/null +++ b/udp_flow.c @@ -0,0 +1,362 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson <david@gibson.dropbear.id.au> + * + * UDP flow tracking functions + */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/uio.h> +#include <unistd.h> +#include <netinet/udp.h> + +#include "util.h" +#include "passt.h" +#include "flow_table.h" +#include "udp_internal.h" + +#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ + +/** + * udp_at_sidx() - Get UDP specific flow at given sidx + * @sidx: Flow and side to retrieve + * + * Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if + * the flow at @sidx is not FLOW_UDP. + */ +struct udp_flow *udp_at_sidx(flow_sidx_t sidx) +{ + union flow *flow = flow_at_sidx(sidx); + + if (!flow) + return NULL; + + ASSERT(flow->f.type == FLOW_UDP); + return &flow->udp; +} + +/** + * udp_flow_close() - Close and clean up UDP flow + * @c: Execution context + * @uflow: UDP flow + */ +void udp_flow_close(const struct ctx *c, struct udp_flow *uflow) +{ + unsigned sidei; + + if (uflow->closed) + return; /* Nothing to do */ + + flow_foreach_sidei(sidei) { + flow_hash_remove(c, FLOW_SIDX(uflow, sidei)); + if (uflow->s[sidei] >= 0) { + epoll_del(c, uflow->s[sidei]); + close(uflow->s[sidei]); + uflow->s[sidei] = -1; + } + } + + uflow->closed = true; +} + +/** + * udp_flow_sock() - Create, bind and connect a flow specific UDP socket + * @c: Execution context + * @uflow: UDP flow to open socket for + * @sidei: Side of @uflow to open socket for + * + * Return: fd of new socket on success, -ve error code on failure + */ +static int udp_flow_sock(const struct ctx *c, + struct udp_flow *uflow, unsigned sidei) +{ + const struct flowside *side = &uflow->f.side[sidei]; + uint8_t pif = uflow->f.pif[sidei]; + union { + flow_sidx_t sidx; + uint32_t data; + } fref = { .sidx = FLOW_SIDX(uflow, sidei) }; + int s; + + s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side, fref.data); + if (s < 0) { + flow_dbg_perror(uflow, "Couldn't open flow specific socket"); + return s; + } + + if (flowside_connect(c, s, pif, side) < 0) { + int rc = -errno; + + epoll_del(c, s); + close(s); + + flow_dbg_perror(uflow, "Couldn't connect flow socket"); + return rc; + } + + /* It's possible, if unlikely, that we could receive some packets in + * between the bind() and connect() which may or may not be for this + * flow. Being UDP we could just discard them, but it's not ideal. + * + * There's also a tricky case if a bunch of datagrams for a new flow + * arrive in rapid succession, the first going to the original listening + * socket and later ones going to this new socket. If we forwarded the + * datagrams from the new socket immediately here they would go before + * the datagram which established the flow. Again, not strictly wrong + * for UDP, but not ideal. + * + * So, we flag that the new socket is in a transient state where it + * might have datagrams for a different flow queued. Before the next + * epoll cycle, udp_flow_defer() will flush out any such datagrams, and + * thereafter everything on the new socket should be strictly for this + * flow. + */ + if (sidei) + uflow->flush1 = true; + else + uflow->flush0 = true; + + return s; +} + +/** + * udp_flow_new() - Common setup for a new UDP flow + * @c: Execution context + * @flow: Initiated flow + * @now: Timestamp + * + * Return: sidx for the target side of the new UDP flow, or FLOW_SIDX_NONE + * on failure. + * + * #syscalls getsockname + */ +static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, + const struct timespec *now) +{ + struct udp_flow *uflow = NULL; + const struct flowside *tgt; + unsigned sidei; + + if (!(tgt = flow_target(c, flow, IPPROTO_UDP))) + goto cancel; + + uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp); + uflow->ts = now->tv_sec; + uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1; + uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0; + + flow_foreach_sidei(sidei) { + if (pif_is_socket(uflow->f.pif[sidei])) + if ((uflow->s[sidei] = udp_flow_sock(c, uflow, sidei)) < 0) + goto cancel; + } + + if (uflow->s[TGTSIDE] >= 0 && inany_is_unspecified(&tgt->oaddr)) { + /* When we target a socket, we connect() it, but might not + * always bind(), leaving the kernel to pick our address. In + * that case connect() will implicitly bind() the socket, but we + * need to determine its local address so that we can match + * reply packets back to the correct flow. Update the flow with + * the information from getsockname() */ + union sockaddr_inany sa; + socklen_t sl = sizeof(sa); + in_port_t port; + + if (getsockname(uflow->s[TGTSIDE], &sa.sa, &sl) < 0 || + inany_from_sockaddr(&uflow->f.side[TGTSIDE].oaddr, + &port, &sa) < 0) { + flow_perror(uflow, "Unable to determine local address"); + goto cancel; + } + if (port != tgt->oport) { + flow_err(uflow, "Unexpected local port"); + goto cancel; + } + } + + /* Tap sides always need to be looked up by hash. Socket sides don't + * always, but sometimes do (receiving packets on a socket not specific + * to one flow). Unconditionally hash both sides so all our bases are + * covered + */ + flow_foreach_sidei(sidei) + flow_hash_insert(c, FLOW_SIDX(uflow, sidei)); + + FLOW_ACTIVATE(uflow); + + return FLOW_SIDX(uflow, TGTSIDE); + +cancel: + if (uflow) + udp_flow_close(c, uflow); + flow_alloc_cancel(flow); + return FLOW_SIDX_NONE; +} + +/** + * udp_flow_from_sock() - Find or create UDP flow for incoming datagram + * @c: Execution context + * @pif: Interface the datagram is arriving from + * @dst: Our (local) address to which the datagram is arriving + * @port: Our (local) port number to which the datagram is arriving + * @s_in: Source socket address, filled in by recvmmsg() + * @now: Timestamp + * + * #syscalls fcntl arm:fcntl64 ppc64:fcntl64|fcntl i686:fcntl64 + * + * Return: sidx for the destination side of the flow for this packet, or + * FLOW_SIDX_NONE if we couldn't find or create a flow. + */ +flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, + const union inany_addr *dst, in_port_t port, + const union sockaddr_inany *s_in, + const struct timespec *now) +{ + const struct flowside *ini; + struct udp_flow *uflow; + union flow *flow; + flow_sidx_t sidx; + + sidx = flow_lookup_sa(c, IPPROTO_UDP, pif, s_in, dst, port); + if ((uflow = udp_at_sidx(sidx))) { + uflow->ts = now->tv_sec; + return flow_sidx_opposite(sidx); + } + + if (!(flow = flow_alloc())) { + char sastr[SOCKADDR_STRLEN]; + + debug("Couldn't allocate flow for UDP datagram from %s %s", + pif_name(pif), sockaddr_ntop(s_in, sastr, sizeof(sastr))); + return FLOW_SIDX_NONE; + } + + ini = flow_initiate_sa(flow, pif, s_in, dst, port); + + if (!inany_is_unicast(&ini->eaddr) || + ini->eport == 0 || ini->oport == 0) { + /* In principle ini->oddr also must be specified, but when we've + * been initiated from a socket bound to 0.0.0.0 or ::, we don't + * know our address, so we have to leave it unpopulated. + */ + flow_err(flow, "Invalid endpoint on UDP recvfrom()"); + flow_alloc_cancel(flow); + return FLOW_SIDX_NONE; + } + + return udp_flow_new(c, flow, now); +} + +/** + * udp_flow_from_tap() - Find or create UDP flow for tap packets + * @c: Execution context + * @pif: pif on which the packet is arriving + * @af: Address family, AF_INET or AF_INET6 + * @saddr: Source address on guest side + * @daddr: Destination address guest side + * @srcport: Source port on guest side + * @dstport: Destination port on guest side + * + * Return: sidx for the destination side of the flow for this packet, or + * FLOW_SIDX_NONE if we couldn't find or create a flow. + */ +flow_sidx_t udp_flow_from_tap(const struct ctx *c, + uint8_t pif, sa_family_t af, + const void *saddr, const void *daddr, + in_port_t srcport, in_port_t dstport, + const struct timespec *now) +{ + const struct flowside *ini; + struct udp_flow *uflow; + union flow *flow; + flow_sidx_t sidx; + + ASSERT(pif == PIF_TAP); + + sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr, + srcport, dstport); + if ((uflow = udp_at_sidx(sidx))) { + uflow->ts = now->tv_sec; + return flow_sidx_opposite(sidx); + } + + if (!(flow = flow_alloc())) { + char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; + + debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu", + pif_name(pif), + inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport, + inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport); + return FLOW_SIDX_NONE; + } + + ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, + daddr, dstport); + + if (inany_is_unspecified(&ini->eaddr) || ini->eport == 0 || + inany_is_unspecified(&ini->oaddr) || ini->oport == 0) { + flow_dbg(flow, "Invalid endpoint on UDP packet"); + flow_alloc_cancel(flow); + return FLOW_SIDX_NONE; + } + + return udp_flow_new(c, flow, now); +} + +/** + * udp_flush_flow() - Flush datagrams that might not be for this flow + * @c: Execution context + * @uflow: Flow to handle + * @sidei: Side of the flow to flush + * @now: Current timestamp + */ +static void udp_flush_flow(const struct ctx *c, + const struct udp_flow *uflow, unsigned sidei, + const struct timespec *now) +{ + /* We don't know exactly where the datagrams will come from, but we know + * they'll have an interface and oport matching this flow */ + udp_sock_fwd(c, uflow->s[sidei], uflow->f.pif[sidei], + uflow->f.side[sidei].oport, now); +} + +/** + * udp_flow_defer() - Deferred per-flow handling (clean up aborted flows) + * @c: Execution context + * @uflow: Flow to handle + * @now: Current timestamp + * + * Return: true if the connection is ready to free, false otherwise + */ +bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow, + const struct timespec *now) +{ + if (uflow->flush0) { + udp_flush_flow(c, uflow, INISIDE, now); + uflow->flush0 = false; + } + if (uflow->flush1) { + udp_flush_flow(c, uflow, TGTSIDE, now); + uflow->flush1 = false; + } + return uflow->closed; +} + +/** + * udp_flow_timer() - Handler for timed events related to a given flow + * @c: Execution context + * @uflow: UDP flow + * @now: Current timestamp + * + * Return: true if the flow is ready to free, false otherwise + */ +bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, + const struct timespec *now) +{ + if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT) + return false; + + udp_flow_close(c, uflow); + return true; +} |