From 08ea3cc581beed16afff3fa934f31cbdb82cbb95 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Wed, 27 Nov 2024 14:54:07 +1100
Subject: tcp: Pass TCP header and payload separately to tcp_fill_headers[46]()

At the moment these take separate pointers to the tap specific and IP
headers, but expect the TCP header and payload as a single tcp_payload_t.
As well as being slightly inconsistent, this involves some slightly iffy
pointer shenanigans when called on the flags path with a tcp_flags_t
instead of a tcp_payload_t.

More importantly, it's inconvenient for the upcoming vhost-user case, where
the TCP header and payload might not be contiguous.  Furthermore, the
payload itself might not be contiguous.

So, pass the TCP header as its own pointer, and the TCP payload as an IO
vector.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
 iov.c          |  1 -
 tcp.c          | 50 +++++++++++++++++-----------------------------
 tcp_buf.c      | 22 ++++++++++----------
 tcp_internal.h |  4 ++--
 tcp_vu.c       | 63 ++++++++++++++++++++++++++++++++--------------------------
 5 files changed, 65 insertions(+), 75 deletions(-)

diff --git a/iov.c b/iov.c
index 2f7be15..3b12272 100644
--- a/iov.c
+++ b/iov.c
@@ -236,7 +236,6 @@ void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
  *	    overruns the IO vector, is not contiguous or doesn't have the
  *	    requested alignment.
  */
-/* cppcheck-suppress unusedFunction */
 void *iov_remove_header_(struct iov_tail *tail, size_t len, size_t align)
 {
 	char *p = iov_peek_header_(tail, len, align);
diff --git a/tcp.c b/tcp.c
index 5c40e18..2f900fc 100644
--- a/tcp.c
+++ b/tcp.c
@@ -909,21 +909,21 @@ static void tcp_fill_header(struct tcphdr *th,
  * @conn:		Connection pointer
  * @taph:		tap backend specific header
  * @iph:		Pointer to IPv4 header
- * @bp:			Pointer to TCP header followed by TCP payload
- * @dlen:		TCP payload length
+ * @th:			Pointer to TCP header
+ * @payload:		TCP payload
  * @check:		Checksum, if already known
  * @seq:		Sequence number for this segment
  * @no_tcp_csum:	Do not set TCP checksum
  */
 void tcp_fill_headers4(const struct tcp_tap_conn *conn,
 		       struct tap_hdr *taph, struct iphdr *iph,
-		       struct tcp_payload_t *bp, size_t dlen,
+		       struct tcphdr *th, struct iov_tail *payload,
 		       const uint16_t *check, uint32_t seq, bool no_tcp_csum)
 {
 	const struct flowside *tapside = TAPFLOW(conn);
 	const struct in_addr *src4 = inany_v4(&tapside->oaddr);
 	const struct in_addr *dst4 = inany_v4(&tapside->eaddr);
-	size_t l4len = dlen + sizeof(bp->th);
+	size_t l4len = iov_tail_size(payload) + sizeof(*th);
 	size_t l3len = l4len + sizeof(*iph);
 
 	ASSERT(src4 && dst4);
@@ -935,19 +935,12 @@ void tcp_fill_headers4(const struct tcp_tap_conn *conn,
 	iph->check = check ? *check :
 			     csum_ip4_header(l3len, IPPROTO_TCP, *src4, *dst4);
 
-	tcp_fill_header(&bp->th, conn, seq);
+	tcp_fill_header(th, conn, seq);
 
-	if (no_tcp_csum) {
-		bp->th.check = 0;
-	} else {
-		const struct iovec iov = {
-			.iov_base = bp->data,
-			.iov_len = dlen,
-		};
-		struct iov_tail payload = IOV_TAIL(&iov, 1, 0);
-
-		tcp_update_check_tcp4(iph, &bp->th, &payload);
-	}
+	if (no_tcp_csum)
+		th->check = 0;
+	else
+		tcp_update_check_tcp4(iph, th, payload);
 
 	tap_hdr_update(taph, l3len + sizeof(struct ethhdr));
 }
@@ -957,19 +950,19 @@ void tcp_fill_headers4(const struct tcp_tap_conn *conn,
  * @conn:		Connection pointer
  * @taph:		tap backend specific header
  * @ip6h:		Pointer to IPv6 header
- * @bp:			Pointer to TCP header followed by TCP payload
- * @dlen:		TCP payload length
+ * @th:			Pointer to TCP header
+ * @payload:		TCP payload
  * @check:		Checksum, if already known
  * @seq:		Sequence number for this segment
  * @no_tcp_csum:	Do not set TCP checksum
  */
 void tcp_fill_headers6(const struct tcp_tap_conn *conn,
 		       struct tap_hdr *taph, struct ipv6hdr *ip6h,
-		       struct tcp_payload_t *bp, size_t dlen,
+		       struct tcphdr *th, struct iov_tail *payload,
 		       uint32_t seq, bool no_tcp_csum)
 {
+	size_t l4len = iov_tail_size(payload) + sizeof(*th);
 	const struct flowside *tapside = TAPFLOW(conn);
-	size_t l4len = dlen + sizeof(bp->th);
 
 	ip6h->payload_len = htons(l4len);
 	ip6h->saddr = tapside->oaddr.a6;
@@ -983,19 +976,12 @@ void tcp_fill_headers6(const struct tcp_tap_conn *conn,
 	ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff;
 	ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff;
 
-	tcp_fill_header(&bp->th, conn, seq);
+	tcp_fill_header(th, conn, seq);
 
-	if (no_tcp_csum) {
-		bp->th.check = 0;
-	} else {
-		const struct iovec iov = {
-			.iov_base = bp->data,
-			.iov_len = dlen,
-		};
-		struct iov_tail payload = IOV_TAIL(&iov, 1, 0);
-
-		tcp_update_check_tcp6(ip6h, &bp->th, &payload);
-	}
+	if (no_tcp_csum)
+		th->check = 0;
+	else
+		tcp_update_check_tcp6(ip6h, th, payload);
 
 	tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr));
 }
diff --git a/tcp_buf.c b/tcp_buf.c
index 0946cd5..830c23d 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -151,29 +151,27 @@ void tcp_payload_flush(const struct ctx *c)
  * tcp_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers
  * @conn:	Connection pointer
  * @iov:	Pointer to an array of iovec of TCP pre-cooked buffers
- * @dlen:	TCP payload length
  * @check:	Checksum, if already known
  * @seq:	Sequence number for this segment
  * @no_tcp_csum: Do not set TCP checksum
  */
 static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
-				    struct iovec *iov, size_t dlen,
-				    const uint16_t *check, uint32_t seq,
-				    bool no_tcp_csum)
+				    struct iovec *iov, const uint16_t *check,
+				    uint32_t seq, bool no_tcp_csum)
 {
+	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
+	struct tcphdr *th = IOV_REMOVE_HEADER(&tail, struct tcphdr);
 	const struct flowside *tapside = TAPFLOW(conn);
 	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
 
 	if (a4) {
 		tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
-				  iov[TCP_IOV_IP].iov_base,
-				  iov[TCP_IOV_PAYLOAD].iov_base, dlen,
-				  check, seq, no_tcp_csum);
+				  iov[TCP_IOV_IP].iov_base, th,
+				  &tail,  check, seq, no_tcp_csum);
 	} else {
 		tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
-				  iov[TCP_IOV_IP].iov_base,
-				  iov[TCP_IOV_PAYLOAD].iov_base, dlen,
-				  seq, no_tcp_csum);
+				  iov[TCP_IOV_IP].iov_base, th,
+				  &tail, seq, no_tcp_csum);
 	}
 }
 
@@ -213,7 +211,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	tcp_payload_used++;
 	l4len = optlen + sizeof(struct tcphdr);
 	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-	tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
+	tcp_l2_buf_fill_headers(conn, iov, NULL, seq, false);
 
 	if (flags & DUP_ACK) {
 		struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
@@ -270,7 +268,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 	payload->th.th_flags = 0;
 	payload->th.ack = 1;
 	iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
-	tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
+	tcp_l2_buf_fill_headers(conn, iov, check, seq, false);
 	if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
 		tcp_payload_flush(c);
 }
diff --git a/tcp_internal.h b/tcp_internal.h
index 744c5c0..9732b5b 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -168,11 +168,11 @@ void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, struct tcphdr *th,
 			   struct iov_tail *payload);
 void tcp_fill_headers4(const struct tcp_tap_conn *conn,
 		       struct tap_hdr *taph, struct iphdr *iph,
-		       struct tcp_payload_t *bp, size_t dlen,
+		       struct tcphdr *th, struct iov_tail *payload,
 		       const uint16_t *check, uint32_t seq, bool no_tcp_csum);
 void tcp_fill_headers6(const struct tcp_tap_conn *conn,
 		       struct tap_hdr *taph, struct ipv6hdr *ip6h,
-		       struct tcp_payload_t *bp, size_t dlen,
+		       struct tcphdr *th, struct iov_tail *payload,
 		       uint32_t seq, bool no_tcp_csum);
 
 int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
diff --git a/tcp_vu.c b/tcp_vu.c
index 134650e..470649e 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -104,10 +104,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	const struct flowside *tapside = TAPFLOW(conn);
 	size_t optlen, hdrlen;
 	struct vu_virtq_element flags_elem[2];
-	struct tcp_payload_t *payload;
 	struct ipv6hdr *ip6h = NULL;
 	struct iovec flags_iov[2];
+	struct tcp_syn_opts *opts;
 	struct iphdr *iph = NULL;
+	struct iov_tail payload;
+	struct tcphdr *th;
 	struct ethhdr *eh;
 	uint32_t seq;
 	int elem_cnt;
@@ -139,35 +141,35 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		iph = vu_ip(flags_elem[0].in_sg[0].iov_base);
 		*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
 
-		payload = vu_payloadv4(flags_elem[0].in_sg[0].iov_base);
+		th = vu_payloadv4(flags_elem[0].in_sg[0].iov_base);
 	} else {
 		eh->h_proto = htons(ETH_P_IPV6);
 
 		ip6h = vu_ip(flags_elem[0].in_sg[0].iov_base);
 		*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
-		payload = vu_payloadv6(flags_elem[0].in_sg[0].iov_base);
+		th = vu_payloadv6(flags_elem[0].in_sg[0].iov_base);
 	}
 
-	memset(&payload->th, 0, sizeof(payload->th));
-	payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
-	payload->th.ack = 1;
+	memset(th, 0, sizeof(*th));
+	th->doff = sizeof(*th) / 4;
+	th->ack = 1;
 
 	seq = conn->seq_to_tap;
-	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
-				(struct tcp_syn_opts *)payload->data,
-				&optlen);
+	opts = (struct tcp_syn_opts *)(th + 1);
+	ret = tcp_prepare_flags(c, conn, flags, th, opts, &optlen);
 	if (ret <= 0) {
 		vu_queue_rewind(vq, 1);
 		return ret;
 	}
 
 	flags_elem[0].in_sg[0].iov_len = hdrlen + optlen;
+	payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
 
 	if (CONN_V4(conn)) {
-		tcp_fill_headers4(conn, NULL, iph, payload, optlen, NULL, seq,
-				  true);
+		tcp_fill_headers4(conn, NULL, iph, th, &payload,
+				  NULL, seq, true);
 	} else {
-		tcp_fill_headers6(conn, NULL, ip6h, payload, optlen, seq, true);
+		tcp_fill_headers6(conn, NULL, ip6h, th, &payload, seq, true);
 	}
 
 	if (*c->pcap) {
@@ -317,23 +319,28 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
  * tcp_vu_prepare() - Prepare the frame header
  * @c:		Execution context
  * @conn:	Connection pointer
- * @first:	Pointer to the array of IO vectors
- * @dlen:	Packet data length
+ * @iov:	Pointer to the array of IO vectors
+ * @iov_cnt:	Number of entries in @iov
  * @check:	Checksum, if already known
  */
-static void tcp_vu_prepare(const struct ctx *c,
-			   struct tcp_tap_conn *conn, char *base,
-			   size_t dlen, const uint16_t **check)
+static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
+			   struct iovec *iov, size_t iov_cnt,
+			   const uint16_t **check)
 {
 	const struct flowside *toside = TAPFLOW(conn);
-	struct tcp_payload_t *payload;
+	bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
+	size_t hdrlen = tcp_vu_hdrlen(v6);
+	struct iov_tail payload = IOV_TAIL(iov, iov_cnt, hdrlen);
+	char *base = iov[0].iov_base;
 	struct ipv6hdr *ip6h = NULL;
 	struct iphdr *iph = NULL;
+	struct tcphdr *th;
 	struct ethhdr *eh;
 
 	/* we guess the first iovec provided by the guest can embed
 	 * all the headers needed by L2 frame
 	 */
+	ASSERT(iov[0].iov_len >= hdrlen);
 
 	eh = vu_eth(base);
 
@@ -342,31 +349,31 @@ static void tcp_vu_prepare(const struct ctx *c,
 
 	/* initialize header */
 
-	if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
+	if (!v6) {
 		eh->h_proto = htons(ETH_P_IP);
 
 		iph = vu_ip(base);
 		*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
-		payload = vu_payloadv4(base);
+		th = vu_payloadv4(base);
 	} else {
 		eh->h_proto = htons(ETH_P_IPV6);
 
 		ip6h = vu_ip(base);
 		*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
 
-		payload = vu_payloadv6(base);
+		th = vu_payloadv6(base);
 	}
 
-	memset(&payload->th, 0, sizeof(payload->th));
-	payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
-	payload->th.ack = 1;
+	memset(th, 0, sizeof(*th));
+	th->doff = sizeof(*th) / 4;
+	th->ack = 1;
 
-	if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
-		tcp_fill_headers4(conn, NULL, iph, payload, dlen,
+	if (!v6) {
+		tcp_fill_headers4(conn, NULL, iph, th, &payload,
 				  *check, conn->seq_to_tap, true);
 		*check = &iph->check;
 	} else {
-		tcp_fill_headers6(conn, NULL, ip6h, payload, dlen,
+		tcp_fill_headers6(conn, NULL, ip6h, th, &payload,
 				  conn->seq_to_tap, true);
 	}
 }
@@ -478,7 +485,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 		if (i + 1 == head_cnt)
 			check = NULL;
 
-		tcp_vu_prepare(c, conn, iov->iov_base, dlen, &check);
+		tcp_vu_prepare(c, conn, iov, buf_cnt, &check);
 
 		if (*c->pcap) {
 			tcp_vu_update_check(tapside, iov, buf_cnt);
-- 
cgit v1.2.3