aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--udp.c148
1 files changed, 87 insertions, 61 deletions
diff --git a/udp.c b/udp.c
index 2d73408..3bedfce 100644
--- a/udp.c
+++ b/udp.c
@@ -198,6 +198,7 @@ static struct ethhdr udp6_eth_hdr;
* @ip4h: Pre-filled IPv4 header (except for tot_len and saddr)
* @taph: Tap backend specific header
* @s_in: Source socket address, filled in by recvmmsg()
+ * @splicesrc: Source port for splicing, or -1 if not spliceable
*/
static struct udp_meta_t {
struct ipv6hdr ip6h;
@@ -205,6 +206,7 @@ static struct udp_meta_t {
struct tap_hdr taph;
union sockaddr_inany s_in;
+ int splicesrc;
}
#ifdef __AVX2__
__attribute__ ((aligned(32)))
@@ -491,28 +493,33 @@ static int udp_mmh_splice_port(union udp_epoll_ref uref,
}
/**
- * udp_splice_sendfrom() - Send datagrams from given port to given port
+ * udp_splice_send() - Send datagrams from socket to socket
* @c: Execution context
* @start: Index of first datagram in udp[46]_l2_buf
- * @n: Number of datagrams to send
- * @src: Datagrams will be sent from this port (on origin side)
- * @dst: Datagrams will be send to this port (on destination side)
- * @from_pif: pif from which the packet originated
- * @v6: Send as IPv6?
- * @allow_new: If true create sending socket if needed, if false discard
- * if no sending socket is available
+ * @n: Total number of datagrams in udp[46]_l2_buf pool
+ * @dst: Datagrams will be sent to this port (on destination side)
+ * @uref: UDP epoll reference for origin socket
* @now: Timestamp
+ *
+ * This consumes as many datagrams as are sendable via a single socket. It
+ * requires that udp_meta[@start].splicesrc is initialised, and will initialise
+ * udp_meta[].splicesrc for each datagram it consumes *and one more* (if
+ * present).
+ *
+ * Return: Number of datagrams forwarded
*/
-static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n,
- in_port_t src, in_port_t dst, uint8_t from_pif,
- bool v6, bool allow_new,
+static unsigned udp_splice_send(const struct ctx *c, size_t start, size_t n,
+ in_port_t dst, union udp_epoll_ref uref,
const struct timespec *now)
{
+ in_port_t src = udp_meta[start].splicesrc;
struct mmsghdr *mmh_recv, *mmh_send;
- unsigned int i;
+ unsigned int i = start;
int s;
- if (v6) {
+ ASSERT(udp_meta[start].splicesrc >= 0);
+
+ if (uref.v6) {
mmh_recv = udp6_l2_mh_sock;
mmh_send = udp6_mh_splice;
} else {
@@ -520,40 +527,48 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n,
mmh_send = udp4_mh_splice;
}
- if (from_pif == PIF_SPLICE) {
+ do {
+ mmh_send[i].msg_hdr.msg_iov->iov_len = mmh_recv[i].msg_len;
+
+ if (++i >= n)
+ break;
+
+ udp_meta[i].splicesrc = udp_mmh_splice_port(uref, &mmh_recv[i]);
+ } while (udp_meta[i].splicesrc == src);
+
+ if (uref.pif == PIF_SPLICE) {
src += c->udp.fwd_in.rdelta[src];
- s = udp_splice_init[v6][src].sock;
- if (s < 0 && allow_new)
- s = udp_splice_new(c, v6, src, false);
+ s = udp_splice_init[uref.v6][src].sock;
+ if (s < 0 && uref.orig)
+ s = udp_splice_new(c, uref.v6, src, false);
if (s < 0)
- return;
+ goto out;
- udp_splice_ns[v6][dst].ts = now->tv_sec;
- udp_splice_init[v6][src].ts = now->tv_sec;
+ udp_splice_ns[uref.v6][dst].ts = now->tv_sec;
+ udp_splice_init[uref.v6][src].ts = now->tv_sec;
} else {
- ASSERT(from_pif == PIF_HOST);
+ ASSERT(uref.pif == PIF_HOST);
src += c->udp.fwd_out.rdelta[src];
- s = udp_splice_ns[v6][src].sock;
- if (s < 0 && allow_new) {
+ s = udp_splice_ns[uref.v6][src].sock;
+ if (s < 0 && uref.orig) {
struct udp_splice_new_ns_arg arg = {
- c, v6, src, -1,
+ c, uref.v6, src, -1,
};
NS_CALL(udp_splice_new_ns, &arg);
s = arg.s;
}
if (s < 0)
- return;
+ goto out;
- udp_splice_init[v6][dst].ts = now->tv_sec;
- udp_splice_ns[v6][src].ts = now->tv_sec;
+ udp_splice_init[uref.v6][dst].ts = now->tv_sec;
+ udp_splice_ns[uref.v6][src].ts = now->tv_sec;
}
- for (i = start; i < start + n; i++)
- mmh_send[i].msg_hdr.msg_iov->iov_len = mmh_recv[i].msg_len;
-
- sendmmsg(s, mmh_send + start, n, MSG_NOSIGNAL);
+ sendmmsg(s, mmh_send + start, i - start, MSG_NOSIGNAL);
+out:
+ return i - start;
}
/**
@@ -688,31 +703,41 @@ static size_t udp_update_hdr6(const struct ctx *c,
* udp_tap_send() - Prepare UDP datagrams and send to tap interface
* @c: Execution context
* @start: Index of first datagram in udp[46]_l2_buf pool
- * @n: Number of datagrams to send
- * @dstport: Destination port number
- * @v6: True if using IPv6
+ * @n: Total number of datagrams in udp[46]_l2_buf pool
+ * @dstport: Destination port number on destination side
+ * @uref: UDP epoll reference for origin socket
* @now: Current timestamp
*
- * Return: size of tap frame with headers
+ * This consumes as many frames as are sendable via tap. It requires that
+ * udp_meta[@start].splicesrc is initialised, and will initialise
+ * udp_meta[].splicesrc for each frame it consumes *and one more* (if present).
+ *
+ * Return: Number of frames sent via tap
*/
-static void udp_tap_send(const struct ctx *c,
- unsigned int start, unsigned int n,
- in_port_t dstport, bool v6, const struct timespec *now)
+static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n,
+ in_port_t dstport, union udp_epoll_ref uref,
+ const struct timespec *now)
{
struct iovec (*tap_iov)[UDP_NUM_IOVS];
- size_t i;
+ struct mmsghdr *mmh_recv;
+ size_t i = start;
- if (v6)
+ ASSERT(udp_meta[start].splicesrc == -1);
+
+ if (uref.v6) {
tap_iov = udp6_l2_iov_tap;
- else
+ mmh_recv = udp6_l2_mh_sock;
+ } else {
+ mmh_recv = udp4_l2_mh_sock;
tap_iov = udp4_l2_iov_tap;
+ }
- for (i = start; i < start + n; i++) {
+ do {
struct udp_payload_t *bp = &udp_payload[i];
struct udp_meta_t *bm = &udp_meta[i];
size_t l4len;
- if (v6) {
+ if (uref.v6) {
l4len = udp_update_hdr6(c, &bm->ip6h,
&bm->s_in.sa6, bp, dstport,
udp6_l2_mh_sock[i].msg_len, now);
@@ -726,9 +751,15 @@ static void udp_tap_send(const struct ctx *c,
sizeof(udp4_eth_hdr));
}
tap_iov[i][UDP_IOV_PAYLOAD].iov_len = l4len;
- }
- tap_send_frames(c, &tap_iov[start][0], UDP_NUM_IOVS, n);
+ if (++i >= n)
+ break;
+
+ udp_meta[i].splicesrc = udp_mmh_splice_port(uref, &mmh_recv[i]);
+ } while (udp_meta[i].splicesrc == -1);
+
+ tap_send_frames(c, &tap_iov[start][0], UDP_NUM_IOVS, i - start);
+ return i - start;
}
/**
@@ -777,24 +808,19 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
if (n <= 0)
return;
+ /* We divide things into batches based on how we need to send them,
+ * determined by udp_meta[i].splicesrc. To avoid either two passes
+ * through the array, or recalculating splicesrc for a single entry, we
+ * have to populate it one entry *ahead* of the loop counter (if
+ * present). So we fill in entry 0 before the loop, then udp_*_send()
+ * populate one entry past where they consume.
+ */
+ udp_meta[0].splicesrc = udp_mmh_splice_port(ref.udp, mmh_recv);
for (i = 0; i < n; i += m) {
- int splicefrom = -1;
-
- splicefrom = udp_mmh_splice_port(ref.udp, mmh_recv + i);
-
- for (m = 1; i + m < n; m++) {
- int p;
-
- p = udp_mmh_splice_port(ref.udp, mmh_recv + i + m);
- if (p != splicefrom)
- break;
- }
-
- if (splicefrom >= 0)
- udp_splice_sendfrom(c, i, m, splicefrom, dstport,
- ref.udp.pif, v6, ref.udp.orig, now);
+ if (udp_meta[i].splicesrc >= 0)
+ m = udp_splice_send(c, i, n, dstport, ref.udp, now);
else
- udp_tap_send(c, i, m, dstport, v6, now);
+ m = udp_tap_send(c, i, n, dstport, ref.udp, now);
}
}