1 files changed, 274 insertions, 307 deletions
diff --git a/tcp.c b/tcp.c
index b65ddeb..21d0af0 100644
--- a/tcp.c
+++ b/tcp.c
@@ -290,6 +290,7 @@
 
 #include "checksum.h"
 #include "util.h"
+#include "iov.h"
 #include "ip.h"
 #include "passt.h"
 #include "tap.h"
@@ -318,39 +319,14 @@
 
 /* MSS rounding: see SET_MSS() */
 #define MSS_DEFAULT			536
-
-struct tcp4_l2_head {	/* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
-#ifdef __AVX2__
-	uint8_t pad[26];
-#else
-	uint8_t pad[2];
-#endif
-	struct tap_hdr taph;
-	struct iphdr iph;
-	struct tcphdr th;
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
-struct tcp6_l2_head {	/* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
-#ifdef __AVX2__
-	uint8_t pad[14];
-#else
-	uint8_t pad[2];
-#endif
-	struct tap_hdr taph;
-	struct ipv6hdr ip6h;
-	struct tcphdr th;
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
-#define MSS4	ROUND_DOWN(USHRT_MAX - sizeof(struct tcp4_l2_head), 4)
-#define MSS6	ROUND_DOWN(USHRT_MAX - sizeof(struct tcp6_l2_head), 4)
+#define MSS4				ROUND_DOWN(IP_MAX_MTU -		   \
+						   sizeof(struct tcphdr) - \
+						   sizeof(struct iphdr),   \
+						   sizeof(uint32_t))
+#define MSS6				ROUND_DOWN(IP_MAX_MTU -		   \
+						   sizeof(struct tcphdr) - \
+						   sizeof(struct ipv6hdr), \
+						   sizeof(uint32_t))
 
 #define WINDOW_DEFAULT			14600		/* RFC 6928 */
 #ifdef HAS_SND_WND
@@ -445,133 +421,107 @@ struct tcp_buf_seq_update {
 };
 
 /* Static buffers */
-
 /**
- * tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
- * @pad:	Align TCP header to 32 bytes, for AVX2 checksum calculation only
- * @taph:	Tap-level headers (partially pre-filled)
- * @iph:	Pre-filled IP header (except for tot_len and saddr)
- * @uh:		Headroom for TCP header
- * @data:	Storage for TCP payload
+ * struct tcp_payload_t - TCP header and data to send segments with payload
+ * @th:		TCP header
+ * @data:	TCP data
  */
-static struct tcp4_l2_buf_t {
-#ifdef __AVX2__
-	uint8_t pad[26];	/* 0, align th to 32 bytes */
-#else
-	uint8_t pad[2];		/*	align iph to 4 bytes	0 */
-#endif
-	struct tap_hdr taph;	/* 26				2 */
-	struct iphdr iph;	/* 44				20 */
-	struct tcphdr th;	/* 64				40 */
-	uint8_t data[MSS4];	/* 84				60 */
-				/* 65536			65532 */
+struct tcp_payload_t {
+	struct tcphdr th;
+	uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)];
 #ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
+} __attribute__ ((packed, aligned(32)));    /* For AVX2 checksum routines */
 #else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
+} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
 #endif
-tcp4_l2_buf[TCP_FRAMES_MEM];
-
-static struct tcp_buf_seq_update tcp4_l2_buf_seq_update[TCP_FRAMES_MEM];
-
-static unsigned int tcp4_l2_buf_used;
 
 /**
- * tcp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections
- * @pad:	Align IPv6 header for checksum calculation to 32B (AVX2) or 4B
- * @taph:	Tap-level headers (partially pre-filled)
- * @ip6h:	Pre-filled IP header (except for payload_len and addresses)
- * @th:		Headroom for TCP header
- * @data:	Storage for TCP payload
+ * struct tcp_flags_t - TCP header and data to send zero-length
+ *                      segments (flags)
+ * @th:		TCP header
+ * @opts	TCP options
  */
-struct tcp6_l2_buf_t {
-#ifdef __AVX2__
-	uint8_t pad[14];	/* 0	align ip6h to 32 bytes */
-#else
-	uint8_t pad[2];		/*	align ip6h to 4 bytes	0 */
-#endif
-	struct tap_hdr taph;	/* 14				2 */
-	struct ipv6hdr ip6h;	/* 32				20 */
-	struct tcphdr th;	/* 72				60 */
-	uint8_t data[MSS6];	/* 92				80 */
-				/* 65536			65532 */
+struct tcp_flags_t {
+	struct tcphdr th;
+	char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
 #ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
+} __attribute__ ((packed, aligned(32)));
 #else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
+} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
 #endif
-tcp6_l2_buf[TCP_FRAMES_MEM];
 
-static struct tcp_buf_seq_update tcp6_l2_buf_seq_update[TCP_FRAMES_MEM];
+/* Ethernet header for IPv4 frames */
+static struct ethhdr		tcp4_eth_src;
+
+static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
+/* IPv4 headers */
+static struct iphdr		tcp4_payload_ip[TCP_FRAMES_MEM];
+/* TCP segments with payload for IPv4 frames */
+static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
+
+static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516");
+
+static struct tcp_buf_seq_update tcp4_seq_update[TCP_FRAMES_MEM];
+static unsigned int tcp4_payload_used;
+
+static struct tap_hdr		tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
+/* IPv4 headers for TCP segment without payload */
+static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
+/* TCP segments without payload for IPv4 frames */
+static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
+
+static unsigned int tcp4_flags_used;
+
+/* Ethernet header for IPv6 frames */
+static struct ethhdr		tcp6_eth_src;
+
+static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
+/* IPv6 headers */
+static struct ipv6hdr		tcp6_payload_ip[TCP_FRAMES_MEM];
+/* TCP headers and data for IPv6 frames */
+static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
+
+static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516");
+
+static struct tcp_buf_seq_update tcp6_seq_update[TCP_FRAMES_MEM];
+static unsigned int tcp6_payload_used;
 
-static unsigned int tcp6_l2_buf_used;
+static struct tap_hdr		tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
+/* IPv6 headers for TCP segment without payload */
+static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
+/* TCP segment without payload for IPv6 frames */
+static struct tcp_flags_t	tcp6_flags[TCP_FRAMES_MEM];
+
+static unsigned int tcp6_flags_used;
 
 /* recvmsg()/sendmsg() data for tap */
 static char 		tcp_buf_discard		[MAX_WINDOW];
 static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
 
-static struct iovec	tcp4_l2_iov		[TCP_FRAMES_MEM];
-static struct iovec	tcp6_l2_iov		[TCP_FRAMES_MEM];
-static struct iovec	tcp4_l2_flags_iov	[TCP_FRAMES_MEM];
-static struct iovec	tcp6_l2_flags_iov	[TCP_FRAMES_MEM];
+/*
+ * enum tcp_iov_parts - I/O vector parts for one TCP frame
+ * @TCP_IOV_TAP		tap backend specific header
+ * @TCP_IOV_ETH		Ethernet header
+ * @TCP_IOV_IP		IP (v4/v6) header
+ * @TCP_IOV_PAYLOAD	IP payload (TCP header + data)
+ * @TCP_NUM_IOVS 	the number of entries in the iovec array
+ */
+enum tcp_iov_parts {
+	TCP_IOV_TAP	= 0,
+	TCP_IOV_ETH	= 1,
+	TCP_IOV_IP	= 2,
+	TCP_IOV_PAYLOAD	= 3,
+	TCP_NUM_IOVS
+};
+
+static struct iovec	tcp4_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec	tcp6_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec	tcp4_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec	tcp6_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
 
 /* sendmsg() to socket */
 static struct iovec	tcp_iov			[UIO_MAXIOV];
 
-/**
- * tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags)
- * @pad:	Align TCP header to 32 bytes, for AVX2 checksum calculation only
- * @taph:	Tap-level headers (partially pre-filled)
- * @iph:	Pre-filled IP header (except for tot_len and saddr)
- * @th:		Headroom for TCP header
- * @opts:	Headroom for TCP options
- */
-static struct tcp4_l2_flags_buf_t {
-#ifdef __AVX2__
-	uint8_t pad[26];	/* 0, align th to 32 bytes */
-#else
-	uint8_t pad[2];		/*	align iph to 4 bytes	0 */
-#endif
-	struct tap_hdr taph;	/* 26				2 */
-	struct iphdr iph;	/* 44				20 */
-	struct tcphdr th;	/* 64				40 */
-	char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
-#endif
-tcp4_l2_flags_buf[TCP_FRAMES_MEM];
-
-static unsigned int tcp4_l2_flags_buf_used;
-
-/**
- * tcp6_l2_flags_buf_t - IPv6 packet buffers for segments without data (flags)
- * @pad:	Align IPv6 header for checksum calculation to 32B (AVX2) or 4B
- * @taph:	Tap-level headers (partially pre-filled)
- * @ip6h:	Pre-filled IP header (except for payload_len and addresses)
- * @th:		Headroom for TCP header
- * @opts:	Headroom for TCP options
- */
-static struct tcp6_l2_flags_buf_t {
-#ifdef __AVX2__
-	uint8_t pad[14];	/* 0	align ip6h to 32 bytes */
-#else
-	uint8_t pad[2];		/*	align ip6h to 4 bytes		   0 */
-#endif
-	struct tap_hdr taph;	/* 14					   2 */
-	struct ipv6hdr ip6h;	/* 32					  20 */
-	struct tcphdr th	/* 72 */ __attribute__ ((aligned(4))); /* 60 */
-	char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
-#endif
-tcp6_l2_flags_buf[TCP_FRAMES_MEM];
-
-static unsigned int tcp6_l2_flags_buf_used;
-
 #define CONN(idx)		(&(FLOW(idx)->tcp))
 
 /* Table for lookup from remote address, local port, remote port */
@@ -942,13 +892,13 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s)
  */
 static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th)
 {
-	uint16_t tlen = ntohs(iph->tot_len) - sizeof(struct iphdr);
+	uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr);
 	struct in_addr saddr = { .s_addr = iph->saddr };
 	struct in_addr daddr = { .s_addr = iph->daddr };
-	uint32_t sum = proto_ipv4_header_psum(tlen, IPPROTO_TCP, saddr, daddr);
+	uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
 
 	th->check = 0;
-	th->check = csum(th, tlen, sum);
+	th->check = csum(th, l4len, sum);
 }
 
 /**
@@ -958,34 +908,23 @@ static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th)
  */
 static void tcp_update_check_tcp6(struct ipv6hdr *ip6h, struct tcphdr *th)
 {
-	uint16_t payload_len = ntohs(ip6h->payload_len);
-	uint32_t sum = proto_ipv6_header_psum(payload_len, IPPROTO_TCP,
+	uint16_t l4len = ntohs(ip6h->payload_len);
+	uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
 					      &ip6h->saddr, &ip6h->daddr);
 
 	th->check = 0;
-	th->check = csum(th, payload_len, sum);
+	th->check = csum(th, l4len, sum);
 }
 
 /**
- * tcp_update_l2_buf() - Update L2 buffers with Ethernet and IPv4 addresses
+ * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
  * @eth_d:	Ethernet destination address, NULL if unchanged
  * @eth_s:	Ethernet source address, NULL if unchanged
  */
 void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 {
-	int i;
-
-	for (i = 0; i < TCP_FRAMES_MEM; i++) {
-		struct tcp4_l2_flags_buf_t *b4f = &tcp4_l2_flags_buf[i];
-		struct tcp6_l2_flags_buf_t *b6f = &tcp6_l2_flags_buf[i];
-		struct tcp4_l2_buf_t *b4 = &tcp4_l2_buf[i];
-		struct tcp6_l2_buf_t *b6 = &tcp6_l2_buf[i];
-
-		eth_update_mac(&b4->taph.eh, eth_d, eth_s);
-		eth_update_mac(&b6->taph.eh, eth_d, eth_s);
-		eth_update_mac(&b4f->taph.eh, eth_d, eth_s);
-		eth_update_mac(&b6f->taph.eh, eth_d, eth_s);
-	}
+	eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
+	eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
 }
 
 /**
@@ -998,26 +937,38 @@ static void tcp_sock4_iov_init(const struct ctx *c)
 	struct iovec *iov;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) {
-		tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) {
-			.taph = TAP_HDR_INIT(ETH_P_IP),
-			.iph = iph,
-			.th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
-		};
+	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
+
+	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
+		tcp4_payload_ip[i] = iph;
+		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
+		tcp4_payload[i].th.ack = 1;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(tcp4_l2_flags_buf); i++) {
-		tcp4_l2_flags_buf[i] = (struct tcp4_l2_flags_buf_t) {
-			.taph = TAP_HDR_INIT(ETH_P_IP),
-			.iph = L2_BUF_IP4_INIT(IPPROTO_TCP)
-		};
+	for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
+		tcp4_flags_ip[i] = iph;
+		tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
+		tcp4_flags[i].th.ack = 1;
+	}
+
+	for (i = 0; i < TCP_FRAMES_MEM; i++) {
+		iov = tcp4_l2_iov[i];
+
+		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
+		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]);
+		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
 	}
 
-	for (i = 0, iov = tcp4_l2_iov; i < TCP_FRAMES_MEM; i++, iov++)
-		iov->iov_base = tap_frame_base(c, &tcp4_l2_buf[i].taph);
+	for (i = 0; i < TCP_FRAMES_MEM; i++) {
+		iov = tcp4_l2_flags_iov[i];
 
-	for (i = 0, iov = tcp4_l2_flags_iov; i < TCP_FRAMES_MEM; i++, iov++)
-		iov->iov_base = tap_frame_base(c, &tcp4_l2_flags_buf[i].taph);
+		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
+		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
+		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]);
+		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
+	}
 }
 
 /**
@@ -1026,29 +977,41 @@ static void tcp_sock4_iov_init(const struct ctx *c)
  */
 static void tcp_sock6_iov_init(const struct ctx *c)
 {
+	struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
 	struct iovec *iov;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(tcp6_l2_buf); i++) {
-		tcp6_l2_buf[i] = (struct tcp6_l2_buf_t) {
-			.taph = TAP_HDR_INIT(ETH_P_IPV6),
-			.ip6h = L2_BUF_IP6_INIT(IPPROTO_TCP),
-			.th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
-		};
+	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
+
+	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
+		tcp6_payload_ip[i] = ip6;
+		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
+		tcp6_payload[i].th.ack = 1;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(tcp6_l2_flags_buf); i++) {
-		tcp6_l2_flags_buf[i] = (struct tcp6_l2_flags_buf_t) {
-			.taph = TAP_HDR_INIT(ETH_P_IPV6),
-			.ip6h = L2_BUF_IP6_INIT(IPPROTO_TCP)
-		};
+	for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
+		tcp6_flags_ip[i] = ip6;
+		tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
+		tcp6_flags[i].th .ack = 1;
+	}
+
+	for (i = 0; i < TCP_FRAMES_MEM; i++) {
+		iov = tcp6_l2_iov[i];
+
+		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
+		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]);
+		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
 	}
 
-	for (i = 0, iov = tcp6_l2_iov; i < TCP_FRAMES_MEM; i++, iov++)
-		iov->iov_base = tap_frame_base(c, &tcp6_l2_buf[i].taph);
+	for (i = 0; i < TCP_FRAMES_MEM; i++) {
+		iov = tcp6_l2_flags_iov[i];
 
-	for (i = 0, iov = tcp6_l2_flags_iov; i < TCP_FRAMES_MEM; i++, iov++)
-		iov->iov_base = tap_frame_base(c, &tcp6_l2_flags_buf[i].taph);
+		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
+		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]);
+		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
+	}
 }
 
 /**
@@ -1284,36 +1247,40 @@ static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
 	} while (0)
 
 /**
- * tcp_l2_flags_buf_flush() - Send out buffers for segments with no data (flags)
+ * tcp_flags_flush() - Send out buffers for segments with no data (flags)
  * @c:		Execution context
  */
-static void tcp_l2_flags_buf_flush(const struct ctx *c)
+static void tcp_flags_flush(const struct ctx *c)
 {
-	tap_send_frames(c, tcp6_l2_flags_iov, 1, tcp6_l2_flags_buf_used);
-	tcp6_l2_flags_buf_used = 0;
+	tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS,
+			tcp6_flags_used);
+	tcp6_flags_used = 0;
 
-	tap_send_frames(c, tcp4_l2_flags_iov, 1, tcp4_l2_flags_buf_used);
-	tcp4_l2_flags_buf_used = 0;
+	tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS,
+			tcp4_flags_used);
+	tcp4_flags_used = 0;
 }
 
 /**
- * tcp_l2_data_buf_flush() - Send out buffers for segments with data
+ * tcp_payload_flush() - Send out buffers for segments with data
  * @c:		Execution context
  */
-static void tcp_l2_data_buf_flush(const struct ctx *c)
+static void tcp_payload_flush(const struct ctx *c)
 {
 	unsigned i;
 	size_t m;
 
-	m = tap_send_frames(c, tcp6_l2_iov, 1, tcp6_l2_buf_used);
+	m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
+			    tcp6_payload_used);
 	for (i = 0; i < m; i++)
-		*tcp6_l2_buf_seq_update[i].seq += tcp6_l2_buf_seq_update[i].len;
-	tcp6_l2_buf_used = 0;
+		*tcp6_seq_update[i].seq += tcp6_seq_update[i].len;
+	tcp6_payload_used = 0;
 
-	m = tap_send_frames(c, tcp4_l2_iov, 1, tcp4_l2_buf_used);
+	m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
+			    tcp4_payload_used);
 	for (i = 0; i < m; i++)
-		*tcp4_l2_buf_seq_update[i].seq += tcp4_l2_buf_seq_update[i].len;
-	tcp4_l2_buf_used = 0;
+		*tcp4_seq_update[i].seq += tcp4_seq_update[i].len;
+	tcp4_payload_used = 0;
 }
 
 /**
@@ -1323,8 +1290,8 @@ static void tcp_l2_data_buf_flush(const struct ctx *c)
 /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
 void tcp_defer_handler(struct ctx *c)
 {
-	tcp_l2_flags_buf_flush(c);
-	tcp_l2_data_buf_flush(c);
+	tcp_flags_flush(c);
+	tcp_payload_flush(c);
 }
 
 /**
@@ -1354,60 +1321,67 @@ static void tcp_fill_header(struct tcphdr *th,
  * tcp_fill_headers4() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers
  * @c:		Execution context
  * @conn:	Connection pointer
+ * @taph:	tap backend specific header
  * @iph:	Pointer to IPv4 header
  * @th:		Pointer to TCP header
- * @plen:	Payload length (including TCP header options)
+ * @dlen:	TCP payload length
  * @check:	Checksum, if already known
  * @seq:	Sequence number for this segment
  *
- * Return: The total length of the IPv4 packet, host order
+ * Return: The IPv4 payload length, host order
  */
 static size_t tcp_fill_headers4(const struct ctx *c,
 				const struct tcp_tap_conn *conn,
+				struct tap_hdr *taph,
 				struct iphdr *iph, struct tcphdr *th,
-				size_t plen, const uint16_t *check,
+				size_t dlen, const uint16_t *check,
 				uint32_t seq)
 {
-	size_t ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
 	const struct in_addr *a4 = inany_v4(&conn->faddr);
+	size_t l4len = dlen + sizeof(*th);
+	size_t l3len = l4len + sizeof(*iph);
 
 	ASSERT(a4);
 
-	iph->tot_len = htons(ip_len);
+	iph->tot_len = htons(l3len);
 	iph->saddr = a4->s_addr;
 	iph->daddr = c->ip4.addr_seen.s_addr;
 
 	iph->check = check ? *check :
-			     csum_ip4_header(iph->tot_len, IPPROTO_TCP,
+			     csum_ip4_header(l3len, IPPROTO_TCP,
 					     *a4, c->ip4.addr_seen);
 
 	tcp_fill_header(th, conn, seq);
 
 	tcp_update_check_tcp4(iph, th);
 
-	return ip_len;
+	tap_hdr_update(taph, l3len + sizeof(struct ethhdr));
+
+	return l4len;
 }
 
 /**
  * tcp_fill_headers6() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers
  * @c:		Execution context
  * @conn:	Connection pointer
+ * @taph:	tap backend specific header
  * @ip6h:	Pointer to IPv6 header
  * @th:		Pointer to TCP header
- * @plen:	Payload length (including TCP header options)
+ * @dlen:	TCP payload length
  * @check:	Checksum, if already known
  * @seq:	Sequence number for this segment
  *
- * Return: The total length of the IPv6 packet, host order
+ * Return: The IPv6 payload length, host order
  */
 static size_t tcp_fill_headers6(const struct ctx *c,
 				const struct tcp_tap_conn *conn,
+				struct tap_hdr *taph,
 				struct ipv6hdr *ip6h, struct tcphdr *th,
-				size_t plen, uint32_t seq)
+				size_t dlen, uint32_t seq)
 {
-	size_t ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+	size_t l4len = dlen + sizeof(*th);
 
-	ip6h->payload_len = htons(plen + sizeof(struct tcphdr));
+	ip6h->payload_len = htons(l4len);
 	ip6h->saddr = conn->faddr.a6;
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr))
 		ip6h->daddr = c->ip6.addr_ll_seen;
@@ -1426,45 +1400,40 @@ static size_t tcp_fill_headers6(const struct ctx *c,
 
 	tcp_update_check_tcp6(ip6h, th);
 
-	return ip_len;
+	tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr));
+
+	return l4len;
 }
 
 /**
  * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers
  * @c:		Execution context
  * @conn:	Connection pointer
- * @p:		Pointer to any type of TCP pre-cooked buffer
- * @plen:	Payload length (including TCP header options)
+ * @iov:	Pointer to an array of iovec of TCP pre-cooked buffers
+ * @dlen:	TCP payload length
  * @check:	Checksum, if already known
  * @seq:	Sequence number for this segment
  *
- * Return: frame length including L2 headers, host order
+ * Return: IP payload length, host order
  */
 static size_t tcp_l2_buf_fill_headers(const struct ctx *c,
 				      const struct tcp_tap_conn *conn,
-				      void *p, size_t plen,
+				      struct iovec *iov, size_t dlen,
 				      const uint16_t *check, uint32_t seq)
 {
 	const struct in_addr *a4 = inany_v4(&conn->faddr);
-	size_t ip_len, tlen;
 
 	if (a4) {
-		struct tcp4_l2_buf_t *b = (struct tcp4_l2_buf_t *)p;
-
-		ip_len = tcp_fill_headers4(c, conn, &b->iph, &b->th, plen,
-					   check, seq);
-
-		tlen = tap_frame_len(c, &b->taph, ip_len);
-	} else {
-		struct tcp6_l2_buf_t *b = (struct tcp6_l2_buf_t *)p;
-
-		ip_len = tcp_fill_headers6(c, conn, &b->ip6h, &b->th, plen,
-					   seq);
-
-		tlen = tap_frame_len(c, &b->taph, ip_len);
+		return tcp_fill_headers4(c, conn, iov[TCP_IOV_TAP].iov_base,
+					 iov[TCP_IOV_IP].iov_base,
+					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
+					 check, seq);
 	}
 
-	return tlen;
+	return tcp_fill_headers6(c, conn, iov[TCP_IOV_TAP].iov_base,
+				 iov[TCP_IOV_IP].iov_base,
+				 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
+				 seq);
 }
 
 /**
@@ -1593,18 +1562,15 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
  */
 static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 {
-	uint32_t prev_ack_to_tap = conn->seq_ack_to_tap;
-	uint32_t prev_wnd_to_tap = conn->wnd_to_tap;
-	struct tcp4_l2_flags_buf_t *b4 = NULL;
-	struct tcp6_l2_flags_buf_t *b6 = NULL;
+	struct tcp_flags_t *payload;
 	struct tcp_info tinfo = { 0 };
 	socklen_t sl = sizeof(tinfo);
 	int s = conn->sock;
 	size_t optlen = 0;
-	struct iovec *iov;
 	struct tcphdr *th;
+	struct iovec *iov;
+	size_t l4len;
 	char *data;
-	void *p;
 
 	if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) &&
 	    !flags && conn->wnd_to_tap)
@@ -1626,19 +1592,14 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags)
 		return 0;
 
-	if (CONN_V4(conn)) {
-		iov = tcp4_l2_flags_iov    + tcp4_l2_flags_buf_used;
-		p = b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++;
-		th = &b4->th;
+	if (CONN_V4(conn))
+		iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+	else
+		iov = tcp6_l2_flags_iov[tcp6_flags_used++];
 
-		/* gcc 11.2 would complain on data = (char *)(th + 1); */
-		data = b4->opts;
-	} else {
-		iov = tcp6_l2_flags_iov    + tcp6_l2_flags_buf_used;
-		p = b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++;
-		th = &b6->th;
-		data = b6->opts;
-	}
+	payload = iov[TCP_IOV_PAYLOAD].iov_base;
+	th = &payload->th;
+	data = payload->opts;
 
 	if (flags & SYN) {
 		int mss;
@@ -1675,9 +1636,7 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		*data++ = OPT_WS_LEN;
 		*data++ = conn->ws_to_tap;
 	} else if (!(flags & RST)) {
-		if (conn->seq_ack_to_tap != prev_ack_to_tap ||
-		    !prev_wnd_to_tap)
-			flags |= ACK;
+		flags |= ACK;
 	}
 
 	th->doff = (sizeof(*th) + optlen) / 4;
@@ -1687,8 +1646,9 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	th->syn = !!(flags & SYN);
 	th->fin = !!(flags & FIN);
 
-	iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen,
-					       NULL, conn->seq_to_tap);
+	l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL,
+					conn->seq_to_tap);
+	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 
 	if (th->ack) {
 		if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap))
@@ -1704,24 +1664,27 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	if (th->fin || th->syn)
 		conn->seq_to_tap++;
 
-	if (CONN_V4(conn)) {
-		if (flags & DUP_ACK) {
-			memcpy(b4 + 1, b4, sizeof(*b4));
-			(iov + 1)->iov_len = iov->iov_len;
-			tcp4_l2_flags_buf_used++;
-		}
+	if (flags & DUP_ACK) {
+		struct iovec *dup_iov;
+		int i;
 
-		if (tcp4_l2_flags_buf_used > ARRAY_SIZE(tcp4_l2_flags_buf) - 2)
-			tcp_l2_flags_buf_flush(c);
-	} else {
-		if (flags & DUP_ACK) {
-			memcpy(b6 + 1, b6, sizeof(*b6));
-			(iov + 1)->iov_len = iov->iov_len;
-			tcp6_l2_flags_buf_used++;
-		}
+		if (CONN_V4(conn))
+			dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+		else
+			dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++];
 
-		if (tcp6_l2_flags_buf_used > ARRAY_SIZE(tcp6_l2_flags_buf) - 2)
-			tcp_l2_flags_buf_flush(c);
+		for (i = 0; i < TCP_NUM_IOVS; i++)
+			memcpy(dup_iov[i].iov_base, iov[i].iov_base,
+			       iov[i].iov_len);
+		dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len;
+	}
+
+	if (CONN_V4(conn)) {
+		if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
+			tcp_flags_flush(c);
+	} else {
+		if (tcp6_flags_used > TCP_FRAMES_MEM - 2)
+			tcp_flags_flush(c);
 	}
 
 	return 0;
@@ -2159,39 +2122,43 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
  * tcp_data_to_tap() - Finalise (queue) highest-numbered scatter-gather buffer
  * @c:		Execution context
  * @conn:	Connection pointer
- * @plen:	Payload length at L4
+ * @dlen:	TCP payload length
  * @no_csum:	Don't compute IPv4 checksum, use the one from previous buffer
  * @seq:	Sequence number to be sent
  */
 static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
-			    ssize_t plen, int no_csum, uint32_t seq)
+			    ssize_t dlen, int no_csum, uint32_t seq)
 {
 	uint32_t *seq_update = &conn->seq_to_tap;
 	struct iovec *iov;
+	size_t l4len;
 
 	if (CONN_V4(conn)) {
-		struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used];
-		const uint16_t *check = no_csum ? &(b - 1)->iph.check : NULL;
+		struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
+		const uint16_t *check = NULL;
 
-		tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update;
-		tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen;
+		if (no_csum) {
+			struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
+			check = &iph->check;
+		}
 
-		iov = tcp4_l2_iov + tcp4_l2_buf_used++;
-		iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
-						       check, seq);
-		if (tcp4_l2_buf_used > ARRAY_SIZE(tcp4_l2_buf) - 1)
-			tcp_l2_data_buf_flush(c);
-	} else if (CONN_V6(conn)) {
-		struct tcp6_l2_buf_t *b = &tcp6_l2_buf[tcp6_l2_buf_used];
+		tcp4_seq_update[tcp4_payload_used].seq = seq_update;
+		tcp4_seq_update[tcp4_payload_used].len = dlen;
 
-		tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update;
-		tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen;
+		iov = tcp4_l2_iov[tcp4_payload_used++];
+		l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, check, seq);
+		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+		if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
+			tcp_payload_flush(c);
+	} else if (CONN_V6(conn)) {
+		tcp6_seq_update[tcp6_payload_used].seq = seq_update;
+		tcp6_seq_update[tcp6_payload_used].len = dlen;
 
-		iov = tcp6_l2_iov + tcp6_l2_buf_used++;
-		iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
-						       NULL, seq);
-		if (tcp6_l2_buf_used > ARRAY_SIZE(tcp6_l2_buf) - 1)
-			tcp_l2_data_buf_flush(c);
+		iov = tcp6_l2_iov[tcp6_payload_used++];
+		l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, NULL, seq);
+		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+		if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
+			tcp_payload_flush(c);
 	}
 }
 
@@ -2208,7 +2175,7 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 {
 	uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
 	int fill_bufs, send_bufs = 0, last_len, iov_rem = 0;
-	int sendlen, len, plen, v4 = CONN_V4(conn);
+	int sendlen, len, dlen, v4 = CONN_V4(conn);
 	int s = conn->sock, i, ret = 0;
 	struct msghdr mh_sock = { 0 };
 	uint16_t mss = MSS_GET(conn);
@@ -2246,19 +2213,19 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 	iov_sock[0].iov_base = tcp_buf_discard;
 	iov_sock[0].iov_len = already_sent;
 
-	if (( v4 && tcp4_l2_buf_used + fill_bufs > ARRAY_SIZE(tcp4_l2_buf)) ||
-	    (!v4 && tcp6_l2_buf_used + fill_bufs > ARRAY_SIZE(tcp6_l2_buf))) {
-		tcp_l2_data_buf_flush(c);
+	if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) ||
+	    (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
+		tcp_payload_flush(c);
 
 		/* Silence Coverity CWE-125 false positive */
-		tcp4_l2_buf_used = tcp6_l2_buf_used = 0;
+		tcp4_payload_used = tcp6_payload_used = 0;
 	}
 
 	for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
 		if (v4)
-			iov->iov_base = &tcp4_l2_buf[tcp4_l2_buf_used + i].data;
+			iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
 		else
-			iov->iov_base = &tcp6_l2_buf[tcp6_l2_buf_used + i].data;
+			iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data;
 		iov->iov_len = mss;
 	}
 	if (iov_rem)
@@ -2300,16 +2267,16 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 	tcp_update_seqack_wnd(c, conn, 0, NULL);
 
 	/* Finally, queue to tap */
-	plen = mss;
+	dlen = mss;
 	seq = conn->seq_to_tap;
 	for (i = 0; i < send_bufs; i++) {
-		int no_csum = i && i != send_bufs - 1 && tcp4_l2_buf_used;
+		int no_csum = i && i != send_bufs - 1 && tcp4_payload_used;
 
 		if (i == send_bufs - 1)
-			plen = last_len;
+			dlen = last_len;
 
-		tcp_data_to_tap(c, conn, plen, no_csum, seq);
-		seq += plen;
+		tcp_data_to_tap(c, conn, dlen, no_csum, seq);
+		seq += dlen;
 	}
 
 	conn_flag(c, conn, ACK_FROM_TAP_DUE);