udp_flow.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

/* SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright Red Hat
 * Author: David Gibson <david@gibson.dropbear.id.au>
 *
 * UDP flow tracking functions
 */

#include <errno.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <unistd.h>

#include "util.h"
#include "passt.h"
#include "flow_table.h"

#define UDP_CONN_TIMEOUT	180 /* s, timeout for ephemeral or local bind */

/**
 * udp_at_sidx() - Get UDP specific flow at given sidx
 * @sidx:    Flow and side to retrieve
 *
 * Return: UDP specific flow at @sidx, or NULL of @sidx is invalid.  Asserts if
 *         the flow at @sidx is not FLOW_UDP.
 */
struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
{
	union flow *flow = flow_at_sidx(sidx);

	if (!flow)
		return NULL;

	ASSERT(flow->f.type == FLOW_UDP);
	return &flow->udp;
}

/**
 * udp_flow_close() - Close and clean up UDP flow
 * @c:		Execution context
 * @uflow:	UDP flow
 */
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
{
	if (uflow->s[INISIDE] >= 0) {
		/* The listening socket needs to stay in epoll */
		close(uflow->s[INISIDE]);
		uflow->s[INISIDE] = -1;
	}

	if (uflow->s[TGTSIDE] >= 0) {
		/* But the flow specific one needs to be removed */
		epoll_ctl(c->epollfd, EPOLL_CTL_DEL, uflow->s[TGTSIDE], NULL);
		close(uflow->s[TGTSIDE]);
		uflow->s[TGTSIDE] = -1;
	}

	uflow->last_errno = 0;

	flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
	if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
		flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
}

/**
 * udp_flow_new() - Common setup for a new UDP flow
 * @c:		Execution context
 * @flow:	Initiated flow
 * @s_ini:	Initiating socket (or -1)
 * @now:	Timestamp
 *
 * Return: UDP specific flow, if successful, NULL on failure
 */
static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
				int s_ini, const struct timespec *now)
{
	const struct flowside *ini = &flow->f.side[INISIDE];
	struct udp_flow *uflow = NULL;
	const struct flowside *tgt;
	uint8_t tgtpif;

	if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0) {
		flow_trace(flow, "Invalid endpoint to initiate UDP flow");
		goto cancel;
	}

	if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
		goto cancel;
	tgtpif = flow->f.pif[TGTSIDE];

	uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
	uflow->ts = now->tv_sec;
	uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;

	if (s_ini >= 0) {
		/* When using auto port-scanning the listening port could go
		 * away, so we need to duplicate the socket
		 */
		uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0);
		if (uflow->s[INISIDE] < 0) {
			flow_err(uflow,
				 "Couldn't duplicate listening socket: %s",
				 strerror(errno));
			goto cancel;
		}
	}

	if (pif_is_socket(tgtpif)) {
		struct mmsghdr discard[UIO_MAXIOV] = { 0 };
		union {
			flow_sidx_t sidx;
			uint32_t data;
		} fref = {
			.sidx = FLOW_SIDX(flow, TGTSIDE),
		};
		int rc;

		uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY,
						     tgtpif, tgt, fref.data);
		if (uflow->s[TGTSIDE] < 0) {
			flow_dbg(uflow,
				 "Couldn't open socket for spliced flow: %s",
				 strerror(errno));
			goto cancel;
		}

		if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) {
			flow_dbg(uflow,
				 "Couldn't connect flow socket: %s",
				 strerror(errno));
			goto cancel;
		}

		/* It's possible, if unlikely, that we could receive some
		 * unrelated packets in between the bind() and connect() of this
		 * socket.  For now we just discard these.  We could consider
		 * trying to redirect these to an appropriate handler, if we
		 * need to.
		 */
		rc = recvmmsg(uflow->s[TGTSIDE], discard, ARRAY_SIZE(discard),
			      MSG_DONTWAIT, NULL);
		if (rc >= ARRAY_SIZE(discard)) {
			flow_dbg(uflow,
				 "Too many (%d) spurious reply datagrams", rc);
			goto cancel;
		} else if (rc > 0) {
			flow_trace(uflow,
				   "Discarded %d spurious reply datagrams", rc);
		} else if (errno != EAGAIN) {
			flow_err(uflow,
				 "Unexpected error discarding datagrams: %s",
				 strerror(errno));
		}
	}

	flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE));

	/* If the target side is a socket, it will be a reply socket that knows
	 * its own flowside.  But if it's tap, then we need to look it up by
	 * hash.
	 */
	if (!pif_is_socket(tgtpif))
		flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE));
	FLOW_ACTIVATE(uflow);

	return FLOW_SIDX(uflow, TGTSIDE);

cancel:
	if (uflow)
		udp_flow_close(c, uflow);
	flow_alloc_cancel(flow);
	return FLOW_SIDX_NONE;
}

/**
 * udp_flow_from_sock() - Find or create UDP flow for "listening" socket
 * @c:		Execution context
 * @ref:	epoll reference of the receiving socket
 * @s_in:	Source socket address, filled in by recvmmsg()
 * @now:	Timestamp
 *
 * #syscalls fcntl arm:fcntl64 ppc64:fcntl64 i686:fcntl64
 *
 * Return: sidx for the destination side of the flow for this packet, or
 *         FLOW_SIDX_NONE if we couldn't find or create a flow.
 */
flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
			       const union sockaddr_inany *s_in,
			       const struct timespec *now)
{
	struct udp_flow *uflow;
	union flow *flow;
	flow_sidx_t sidx;

	ASSERT(ref.type == EPOLL_TYPE_UDP_LISTEN);

	sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, s_in, ref.udp.port);
	if ((uflow = udp_at_sidx(sidx))) {
		uflow->ts = now->tv_sec;
		return flow_sidx_opposite(sidx);
	}

	if (!(flow = flow_alloc())) {
		char sastr[SOCKADDR_STRLEN];

		debug("Couldn't allocate flow for UDP datagram from %s %s",
		      pif_name(ref.udp.pif),
		      sockaddr_ntop(s_in, sastr, sizeof(sastr)));
		return FLOW_SIDX_NONE;
	}

	flow_initiate_sa(flow, ref.udp.pif, s_in, ref.udp.port);
	return udp_flow_new(c, flow, ref.fd, now);
}

/**
 * udp_flow_from_tap() - Find or create UDP flow for tap packets
 * @c:		Execution context
 * @pif:	pif on which the packet is arriving
 * @af:		Address family, AF_INET or AF_INET6
 * @saddr:	Source address on guest side
 * @daddr:	Destination address guest side
 * @srcport:	Source port on guest side
 * @dstport:	Destination port on guest side
 *
 * Return: sidx for the destination side of the flow for this packet, or
 *         FLOW_SIDX_NONE if we couldn't find or create a flow.
 */
flow_sidx_t udp_flow_from_tap(const struct ctx *c,
			      uint8_t pif, sa_family_t af,
			      const void *saddr, const void *daddr,
			      in_port_t srcport, in_port_t dstport,
			      const struct timespec *now)
{
	struct udp_flow *uflow;
	union flow *flow;
	flow_sidx_t sidx;

	ASSERT(pif == PIF_TAP);

	sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
			      srcport, dstport);
	if ((uflow = udp_at_sidx(sidx))) {
		uflow->ts = now->tv_sec;
		return flow_sidx_opposite(sidx);
	}

	if (!(flow = flow_alloc())) {
		char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];

		debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu",
		      pif_name(pif),
		      inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport,
		      inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport);
		return FLOW_SIDX_NONE;
	}

	flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport);

	return udp_flow_new(c, flow, -1, now);
}

/**
 * udp_flow_timer() - Handler for timed events related to a given flow
 * @c:		Execution context
 * @uflow:	UDP flow
 * @now:	Current timestamp
 *
 * Return: true if the flow is ready to free, false otherwise
 */
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
		    const struct timespec *now)
{
	if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT)
		return false;

	udp_flow_close(c, uflow);
	return true;
}