udp: Handle partial failure in sendmmsg() to UNIX domain socket

Similarly to the handling introduced by commit "tcp: Proper error handling for sendmmsg() to UNIX domain socket" for TCP, we need to deal with partial sendmmsg() failures for UDP as well. Here, we can lose messages, but we need to make sure that the last message is delivered completely, otherwise qemu will fail to reassemble further packets. For UDP, this is somewhat complicated by the fact that one message might include multiple datagrams, and we need to respect message boundaries: go through headers, and calculate what we need to re-send, if anything. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
author: Stefano Brivio <sbrivio@redhat.com> 2021-08-26 21:10:48 +0200
committer: Stefano Brivio <sbrivio@redhat.com> 2021-09-01 17:00:27 +0200
commit: 77d4efa23620700b369270f086facdf87b6194b4 (patch)
tree: deef7d52189b5f052240db153110a9bb236cb858 /udp.c
parent: cd04d238b2d98688962ee372232b04466dc8f938 (diff)
download: passt-77d4efa23620700b369270f086facdf87b6194b4.tar
passt-77d4efa23620700b369270f086facdf87b6194b4.tar.gz
passt-77d4efa23620700b369270f086facdf87b6194b4.tar.bz2
passt-77d4efa23620700b369270f086facdf87b6194b4.tar.lz
passt-77d4efa23620700b369270f086facdf87b6194b4.tar.xz
passt-77d4efa23620700b369270f086facdf87b6194b4.tar.zst
passt-77d4efa23620700b369270f086facdf87b6194b4.zip
1 files changed, 60 insertions, 20 deletions
diff --git a/udp.c b/udp.c
index 2178082..097e263 100644
--- a/udp.c
+++ b/udp.c
@@ -644,9 +644,11 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		      struct timespec *now)
 {
-	int i, iov_in_msg, msg_i = 0;
+	int iov_in_msg, msg_i = 0, ret;
+	ssize_t n, msglen, missing;
+	struct mmsghdr *tap_mmh;
 	struct msghdr *cur_mh;
-	ssize_t n, msglen;
+	unsigned int i;
 
 	if (events == EPOLLERR)
 		return;
@@ -664,7 +666,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		cur_mh = &udp6_l2_mh_tap[msg_i].msg_hdr;
 		cur_mh->msg_iov = &udp6_l2_iov_tap[0];
 		msg_i = msglen = iov_in_msg = 0;
-		/* TODO: Explicit AVX2 vectorisation of this loop */
+
 		for (i = 0; i < n; i++) {
 			struct udp6_l2_buf_t *b = &udp6_l2_buf[i];
 			size_t ip_len, iov_len;
@@ -725,7 +727,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 			udp6_l2_iov_tap[i].iov_len = iov_len;
 
 			/* With bigger messages, qemu closes the connection. */
-			if (iov_in_msg && msglen + iov_len > SHRT_MAX) {
+			if (iov_in_msg && msglen + iov_len > USHRT_MAX) {
 				cur_mh->msg_iovlen = iov_in_msg;
 
 				cur_mh = &udp6_l2_mh_tap[++msg_i].msg_hdr;
@@ -737,14 +739,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 			iov_in_msg++;
 		}
 
-		if (c->mode == MODE_PASTA)
-			return;
-
-		cur_mh->msg_iovlen = iov_in_msg;
-
-		sendmmsg(c->fd_tap, udp6_l2_mh_tap, msg_i + 1,
-			 MSG_NOSIGNAL | MSG_DONTWAIT);
-		pcapmm(udp6_l2_mh_tap, msg_i + 1);
+		tap_mmh = udp6_l2_mh_tap;
 	} else {
 		n = recvmmsg(ref.s, udp4_l2_mh_sock, UDP_TAP_FRAMES, 0, NULL);
 		if (n <= 0)
@@ -753,7 +748,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		cur_mh = &udp4_l2_mh_tap[msg_i].msg_hdr;
 		cur_mh->msg_iov = &udp4_l2_iov_tap[0];
 		msg_i = msglen = iov_in_msg = 0;
-		/* TODO: Explicit AVX2 vectorisation of this loop */
+
 		for (i = 0; i < n; i++) {
 			struct udp4_l2_buf_t *b = &udp4_l2_buf[i];
 			size_t ip_len, iov_len;
@@ -801,7 +796,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 			udp4_l2_iov_tap[i].iov_len = iov_len;
 
 			/* With bigger messages, qemu closes the connection. */
-			if (iov_in_msg && msglen + iov_len > SHRT_MAX) {
+			if (iov_in_msg && msglen + iov_len > USHRT_MAX) {
 				cur_mh->msg_iovlen = iov_in_msg;
 
 				cur_mh = &udp4_l2_mh_tap[++msg_i].msg_hdr;
@@ -813,15 +808,60 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 			iov_in_msg++;
 		}
 
-		if (c->mode == MODE_PASTA)
-			return;
+		tap_mmh = udp4_l2_mh_tap;
+	}
 
-		cur_mh->msg_iovlen = iov_in_msg;
+	if (c->mode == MODE_PASTA)
+		return;
 
-		sendmmsg(c->fd_tap, udp4_l2_mh_tap, msg_i + 1,
-			 MSG_NOSIGNAL | MSG_DONTWAIT);
-		pcapmm(udp4_l2_mh_tap, msg_i + 1);
+	cur_mh->msg_iovlen = iov_in_msg;
+	ret = sendmmsg(c->fd_tap, tap_mmh, msg_i + 1,
+		       MSG_NOSIGNAL | MSG_DONTWAIT);
+	if (ret <= 0)
+		return;
+
+	/* If we lose some messages to sendmmsg() here, fine, it's UDP. However,
+	 * the last message needs to be delivered completely, otherwise qemu
+	 * will fail to reassemble the next message and close the connection. Go
+	 * through headers from the last sent message, counting bytes, and, if
+	 * and as soon as we see more bytes than sendmmsg() sent, re-send the
+	 * rest with a blocking call.
+	 *
+	 * In pictures, given this example:
+	 *
+	 *				 	iov #0  iov #2  iov #3  iov #4
+	 * tap_mmh[ret - 1].msg_hdr:		....    ......  .....   ......
+	 * tap_mmh[ret - 1].msg_len:	7	....    ...
+	 *
+	 * when 'msglen' reaches:	10		      ^
+	 * and 'missing' below is:	3	           ---
+	 *
+	 * re-send everything from here:		   ^--  -----   ------
+	 */
+	cur_mh = &tap_mmh[ret - 1].msg_hdr;
+	for (i = 0, msglen = 0; i < cur_mh->msg_iovlen; i++) {
+		if (missing <= 0) {
+			msglen += cur_mh->msg_iov[i].iov_len;
+			missing = msglen - tap_mmh[ret - 1].msg_len;
+		}
+
+		if (missing) {
+			uint8_t **iov_base;
+			int first_offset;
+
+			iov_base = (uint8_t **)&cur_mh->msg_iov[i].iov_base;
+			first_offset = cur_mh->msg_iov[i].iov_len - missing;
+			*iov_base += first_offset;
+			cur_mh->msg_iov[i].iov_len = missing;
+
+			cur_mh->msg_iov = &cur_mh->msg_iov[i];
+
+			sendmsg(c->fd_tap, cur_mh, MSG_NOSIGNAL);
+			break;
+		}
 	}
+
+	pcapmm(tap_mmh, ret);
 }
 
 /**
author	Stefano Brivio <sbrivio@redhat.com>	2021-08-26 21:10:48 +0200
committer	Stefano Brivio <sbrivio@redhat.com>	2021-09-01 17:00:27 +0200
commit	77d4efa23620700b369270f086facdf87b6194b4 (patch)
tree	deef7d52189b5f052240db153110a9bb236cb858 /udp.c
parent	cd04d238b2d98688962ee372232b04466dc8f938 (diff)
download	passt-77d4efa23620700b369270f086facdf87b6194b4.tar passt-77d4efa23620700b369270f086facdf87b6194b4.tar.gz passt-77d4efa23620700b369270f086facdf87b6194b4.tar.bz2 passt-77d4efa23620700b369270f086facdf87b6194b4.tar.lz passt-77d4efa23620700b369270f086facdf87b6194b4.tar.xz passt-77d4efa23620700b369270f086facdf87b6194b4.tar.zst passt-77d4efa23620700b369270f086facdf87b6194b4.zip