aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--flow.c2
-rw-r--r--udp.c4
-rw-r--r--udp_flow.c75
-rw-r--r--udp_flow.h6
-rw-r--r--udp_internal.h2
5 files changed, 63 insertions, 26 deletions
diff --git a/flow.c b/flow.c
index 8622242..29a83e1 100644
--- a/flow.c
+++ b/flow.c
@@ -850,7 +850,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
closed = icmp_ping_timer(c, &flow->ping, now);
break;
case FLOW_UDP:
- closed = udp_flow_defer(&flow->udp);
+ closed = udp_flow_defer(c, &flow->udp, now);
if (!closed && timer)
closed = udp_flow_timer(c, &flow->udp, now);
break;
diff --git a/udp.c b/udp.c
index d9d2183..ed6edc1 100644
--- a/udp.c
+++ b/udp.c
@@ -698,8 +698,8 @@ static void udp_buf_sock_to_tap(const struct ctx *c, int s, int n,
* @port: Our (local) port number of @s
* @now: Current timestamp
*/
-static void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
- in_port_t port, const struct timespec *now)
+void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
+ in_port_t port, const struct timespec *now)
{
union sockaddr_inany src;
diff --git a/udp_flow.c b/udp_flow.c
index 5afe6e5..75f5a0b 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -9,10 +9,12 @@
#include <fcntl.h>
#include <sys/uio.h>
#include <unistd.h>
+#include <netinet/udp.h>
#include "util.h"
#include "passt.h"
#include "flow_table.h"
+#include "udp_internal.h"
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
@@ -67,16 +69,15 @@ void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
* Return: fd of new socket on success, -ve error code on failure
*/
static int udp_flow_sock(const struct ctx *c,
- const struct udp_flow *uflow, unsigned sidei)
+ struct udp_flow *uflow, unsigned sidei)
{
const struct flowside *side = &uflow->f.side[sidei];
- struct mmsghdr discard[UIO_MAXIOV] = { 0 };
uint8_t pif = uflow->f.pif[sidei];
union {
flow_sidx_t sidx;
uint32_t data;
} fref = { .sidx = FLOW_SIDX(uflow, sidei) };
- int rc, s;
+ int s;
s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side, fref.data);
if (s < 0) {
@@ -85,30 +86,32 @@ static int udp_flow_sock(const struct ctx *c,
}
if (flowside_connect(c, s, pif, side) < 0) {
- rc = -errno;
+ int rc = -errno;
flow_dbg_perror(uflow, "Couldn't connect flow socket");
return rc;
}
- /* It's possible, if unlikely, that we could receive some unrelated
- * packets in between the bind() and connect() of this socket. For now
- * we just discard these.
+ /* It's possible, if unlikely, that we could receive some packets in
+ * between the bind() and connect() which may or may not be for this
+ * flow. Being UDP we could just discard them, but it's not ideal.
*
- * FIXME: Redirect these to an appropriate handler
+ * There's also a tricky case if a bunch of datagrams for a new flow
+ * arrive in rapid succession, the first going to the original listening
+ * socket and later ones going to this new socket. If we forwarded the
+ * datagrams from the new socket immediately here they would go before
+ * the datagram which established the flow. Again, not strictly wrong
+ * for UDP, but not ideal.
+ *
+ * So, we flag that the new socket is in a transient state where it
+ * might have datagrams for a different flow queued. Before the next
+ * epoll cycle, udp_flow_defer() will flush out any such datagrams, and
+ * thereafter everything on the new socket should be strictly for this
+ * flow.
*/
- rc = recvmmsg(s, discard, ARRAY_SIZE(discard), MSG_DONTWAIT, NULL);
- if (rc >= ARRAY_SIZE(discard)) {
- flow_dbg(uflow, "Too many (%d) spurious reply datagrams", rc);
- return -E2BIG;
- }
-
- if (rc > 0) {
- flow_trace(uflow, "Discarded %d spurious reply datagrams", rc);
- } else if (errno != EAGAIN) {
- rc = -errno;
- flow_perror(uflow, "Unexpected error discarding datagrams");
- return rc;
- }
+ if (sidei)
+ uflow->flush1 = true;
+ else
+ uflow->flush0 = true;
return s;
}
@@ -269,13 +272,41 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
}
/**
+ * udp_flush_flow() - Flush datagrams that might not be for this flow
+ * @c: Execution context
+ * @uflow: Flow to handle
+ * @sidei: Side of the flow to flush
+ * @now: Current timestamp
+ */
+static void udp_flush_flow(const struct ctx *c,
+ const struct udp_flow *uflow, unsigned sidei,
+ const struct timespec *now)
+{
+ /* We don't know exactly where the datagrams will come from, but we know
+ * they'll have an interface and oport matching this flow */
+ udp_sock_fwd(c, uflow->s[sidei], uflow->f.pif[sidei],
+ uflow->f.side[sidei].oport, now);
+}
+
+/**
* udp_flow_defer() - Deferred per-flow handling (clean up aborted flows)
+ * @c: Execution context
* @uflow: Flow to handle
+ * @now: Current timestamp
*
* Return: true if the connection is ready to free, false otherwise
*/
-bool udp_flow_defer(const struct udp_flow *uflow)
+bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow,
+ const struct timespec *now)
{
+ if (uflow->flush0) {
+ udp_flush_flow(c, uflow, INISIDE, now);
+ uflow->flush0 = false;
+ }
+ if (uflow->flush1) {
+ udp_flush_flow(c, uflow, TGTSIDE, now);
+ uflow->flush1 = false;
+ }
return uflow->closed;
}
diff --git a/udp_flow.h b/udp_flow.h
index bbdeb2a..90d3b29 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -11,6 +11,8 @@
* struct udp_flow - Descriptor for a flow of UDP packets
* @f: Generic flow information
* @closed: Flow is already closed
+ * @flush0: @s[0] may have datagrams queued for other flows
+ * @flush1: @s[1] may have datagrams queued for other flows
* @ts: Activity timestamp
* @s: Socket fd (or -1) for each side of the flow
* @ttl: TTL or hop_limit for both sides
@@ -20,6 +22,7 @@ struct udp_flow {
struct flow_common f;
bool closed :1;
+ bool flush0, flush1 :1;
time_t ts;
int s[SIDES];
uint8_t ttl[SIDES];
@@ -35,7 +38,8 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
in_port_t srcport, in_port_t dstport,
const struct timespec *now);
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
-bool udp_flow_defer(const struct udp_flow *uflow);
+bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow,
+ const struct timespec *now);
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
const struct timespec *now);
diff --git a/udp_internal.h b/udp_internal.h
index f7d8426..96d11cf 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -28,5 +28,7 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
const struct flowside *toside, size_t dlen,
bool no_udp_csum);
+void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
+ in_port_t port, const struct timespec *now);
#endif /* UDP_INTERNAL_H */