aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--epoll_type.h4
-rw-r--r--passt.c6
-rw-r--r--udp.c46
-rw-r--r--udp.h4
-rw-r--r--udp_flow.c32
-rw-r--r--util.c2
6 files changed, 41 insertions, 53 deletions
diff --git a/epoll_type.h b/epoll_type.h
index 7f2a121..12ac59b 100644
--- a/epoll_type.h
+++ b/epoll_type.h
@@ -22,8 +22,8 @@ enum epoll_type {
EPOLL_TYPE_TCP_TIMER,
/* UDP "listening" sockets */
EPOLL_TYPE_UDP_LISTEN,
- /* UDP socket for replies on a specific flow */
- EPOLL_TYPE_UDP_REPLY,
+ /* UDP socket for a specific flow */
+ EPOLL_TYPE_UDP,
/* ICMP/ICMPv6 ping sockets */
EPOLL_TYPE_PING,
/* inotify fd watching for end of netns (pasta) */
diff --git a/passt.c b/passt.c
index cd06772..388d10f 100644
--- a/passt.c
+++ b/passt.c
@@ -68,7 +68,7 @@ char *epoll_type_str[] = {
[EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket",
[EPOLL_TYPE_TCP_TIMER] = "TCP timer",
[EPOLL_TYPE_UDP_LISTEN] = "listening UDP socket",
- [EPOLL_TYPE_UDP_REPLY] = "UDP reply socket",
+ [EPOLL_TYPE_UDP] = "UDP flow socket",
[EPOLL_TYPE_PING] = "ICMP/ICMPv6 ping socket",
[EPOLL_TYPE_NSQUIT_INOTIFY] = "namespace inotify watch",
[EPOLL_TYPE_NSQUIT_TIMER] = "namespace timer watch",
@@ -339,8 +339,8 @@ loop:
case EPOLL_TYPE_UDP_LISTEN:
udp_listen_sock_handler(&c, ref, eventmask, &now);
break;
- case EPOLL_TYPE_UDP_REPLY:
- udp_reply_sock_handler(&c, ref, eventmask, &now);
+ case EPOLL_TYPE_UDP:
+ udp_sock_handler(&c, ref, eventmask, &now);
break;
case EPOLL_TYPE_PING:
icmp_sock_handler(&c, ref);
diff --git a/udp.c b/udp.c
index 5a251df..1b3fffd 100644
--- a/udp.c
+++ b/udp.c
@@ -39,27 +39,30 @@
* could receive packets from multiple flows, so we use a hash table match to
* find the specific flow for a datagram.
*
- * When a UDP flow is initiated from a listening socket we take a duplicate of
- * the socket and store it in uflow->s[INISIDE]. This will last for the
- * lifetime of the flow, even if the original listening socket is closed due to
- * port auto-probing. The duplicate is used to deliver replies back to the
- * originating side.
- *
- * Reply sockets
- * =============
+ * Flow sockets
+ * ============
*
- * When a UDP flow targets a socket, we create a "reply" socket in
+ * When a UDP flow targets a socket, we create a "flow" socket in
* uflow->s[TGTSIDE] both to deliver datagrams to the target side and receive
* replies on the target side. This socket is both bound and connected and has
- * EPOLL_TYPE_UDP_REPLY. The connect() means it will only receive datagrams
+ * EPOLL_TYPE_UDP. The connect() means it will only receive datagrams
* associated with this flow, so the epoll reference directly points to the flow
* and we don't need a hash lookup.
*
- * NOTE: it's possible that the reply socket could have a bound address
- * overlapping with an unrelated listening socket. We assume datagrams for the
- * flow will come to the reply socket in preference to a listening socket. The
- * sample program doc/platform-requirements/reuseaddr-priority.c documents and
- * tests that assumption.
+ * When a flow is initiated from a listening socket, we create a "flow" socket
+ * with the same bound address as the listening socket, but also connect()ed to
+ * the flow's peer. This is stored in uflow->s[INISIDE] and will last for the
+ * lifetime of the flow, even if the original listening socket is closed due to
+ * port auto-probing. The duplicate is used to deliver replies back to the
+ * originating side.
+ *
+ * NOTE: A flow socket can have a bound address overlapping with a listening
+ * socket. That will happen naturally for flows initiated from a socket, but is
+ * also possible (though unlikely) for tap initiated flows, depending on the
+ * source port. We assume datagrams for the flow will come to a connect()ed
+ * socket in preference to a listening socket. The sample program
+ * doc/platform-requirements/reuseaddr-priority.c documents and tests that
+ * assumption.
*
* "Spliced" flows
* ===============
@@ -71,8 +74,7 @@
* actually used; it doesn't make sense for datagrams and instead a pair of
* recvmmsg() and sendmmsg() is used to forward the datagrams.
*
- * Note that a spliced flow will have *both* a duplicated listening socket and a
- * reply socket (see above).
+ * Note that a spliced flow will have two flow sockets (see above).
*/
#include <sched.h>
@@ -557,7 +559,7 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
}
eh = (const struct errhdr *)CMSG_DATA(hdr);
- if (ref.type == EPOLL_TYPE_UDP_REPLY) {
+ if (ref.type == EPOLL_TYPE_UDP) {
flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
const struct flowside *toside = flowside_at_sidx(sidx);
size_t dlen = rc;
@@ -792,14 +794,14 @@ static bool udp_buf_reply_sock_data(const struct ctx *c,
}
/**
- * udp_reply_sock_handler() - Handle new data from flow specific socket
+ * udp_sock_handler() - Handle new data from flow specific socket
* @c: Execution context
* @ref: epoll reference
* @events: epoll events bitmap
* @now: Current timestamp
*/
-void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
- uint32_t events, const struct timespec *now)
+void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events, const struct timespec *now)
{
struct udp_flow *uflow = udp_at_sidx(ref.flowside);
@@ -807,7 +809,7 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
if (events & EPOLLERR) {
if (udp_sock_errs(c, ref) < 0) {
- flow_err(uflow, "Unrecoverable error on reply socket");
+ flow_err(uflow, "Unrecoverable error on flow socket");
goto fail;
}
}
diff --git a/udp.h b/udp.h
index a811475..8f8531a 100644
--- a/udp.h
+++ b/udp.h
@@ -11,8 +11,8 @@
void udp_portmap_clear(void);
void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
-void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
- uint32_t events, const struct timespec *now);
+void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events, const struct timespec *now);
int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
uint8_t ttl, const struct pool *p, int idx,
diff --git a/udp_flow.c b/udp_flow.c
index 99ae490..a2d417f 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -49,10 +49,7 @@ void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
flow_foreach_sidei(sidei) {
flow_hash_remove(c, FLOW_SIDX(uflow, sidei));
if (uflow->s[sidei] >= 0) {
- /* The listening socket needs to stay in epoll, but the
- * flow specific one needs to be removed */
- if (sidei == TGTSIDE)
- epoll_del(c, uflow->s[sidei]);
+ epoll_del(c, uflow->s[sidei]);
close(uflow->s[sidei]);
uflow->s[sidei] = -1;
}
@@ -81,7 +78,7 @@ static int udp_flow_sock(const struct ctx *c,
} fref = { .sidx = FLOW_SIDX(uflow, sidei) };
int rc, s;
- s = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY, pif, side, fref.data);
+ s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side, fref.data);
if (s < 0) {
flow_dbg_perror(uflow, "Couldn't open flow specific socket");
return s;
@@ -120,13 +117,12 @@ static int udp_flow_sock(const struct ctx *c,
* udp_flow_new() - Common setup for a new UDP flow
* @c: Execution context
* @flow: Initiated flow
- * @s_ini: Initiating socket (or -1)
* @now: Timestamp
*
* Return: UDP specific flow, if successful, NULL on failure
*/
static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
- int s_ini, const struct timespec *now)
+ const struct timespec *now)
{
struct udp_flow *uflow = NULL;
unsigned sidei;
@@ -139,22 +135,12 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0;
- if (s_ini >= 0) {
- /* When using auto port-scanning the listening port could go
- * away, so we need to duplicate the socket
- */
- uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0);
- if (uflow->s[INISIDE] < 0) {
- flow_perror(uflow,
- "Couldn't duplicate listening socket");
- goto cancel;
- }
+ flow_foreach_sidei(sidei) {
+ if (pif_is_socket(uflow->f.pif[sidei]))
+ if ((uflow->s[sidei] = udp_flow_sock(c, uflow, sidei)) < 0)
+ goto cancel;
}
- if (pif_is_socket(flow->f.pif[TGTSIDE]))
- if ((uflow->s[TGTSIDE] = udp_flow_sock(c, uflow, TGTSIDE)) < 0)
- goto cancel;
-
/* Tap sides always need to be looked up by hash. Socket sides don't
* always, but sometimes do (receiving packets on a socket not specific
* to one flow). Unconditionally hash both sides so all our bases are
@@ -225,7 +211,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
return FLOW_SIDX_NONE;
}
- return udp_flow_new(c, flow, ref.fd, now);
+ return udp_flow_new(c, flow, now);
}
/**
@@ -281,7 +267,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
return FLOW_SIDX_NONE;
}
- return udp_flow_new(c, flow, -1, now);
+ return udp_flow_new(c, flow, now);
}
/**
diff --git a/util.c b/util.c
index b9a3d43..0f68cf5 100644
--- a/util.c
+++ b/util.c
@@ -71,7 +71,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
case EPOLL_TYPE_UDP_LISTEN:
freebind = c->freebind;
/* fallthrough */
- case EPOLL_TYPE_UDP_REPLY:
+ case EPOLL_TYPE_UDP:
proto = IPPROTO_UDP;
socktype = SOCK_DGRAM | SOCK_NONBLOCK;
break;