aboutgitcodebugslistschat
path: root/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcp.c')
-rw-r--r--tcp.c581
1 files changed, 274 insertions, 307 deletions
diff --git a/tcp.c b/tcp.c
index b65ddeb..21d0af0 100644
--- a/tcp.c
+++ b/tcp.c
@@ -290,6 +290,7 @@
#include "checksum.h"
#include "util.h"
+#include "iov.h"
#include "ip.h"
#include "passt.h"
#include "tap.h"
@@ -318,39 +319,14 @@
/* MSS rounding: see SET_MSS() */
#define MSS_DEFAULT 536
-
-struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
-#ifdef __AVX2__
- uint8_t pad[26];
-#else
- uint8_t pad[2];
-#endif
- struct tap_hdr taph;
- struct iphdr iph;
- struct tcphdr th;
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
-struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
-#ifdef __AVX2__
- uint8_t pad[14];
-#else
- uint8_t pad[2];
-#endif
- struct tap_hdr taph;
- struct ipv6hdr ip6h;
- struct tcphdr th;
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
-#define MSS4 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp4_l2_head), 4)
-#define MSS6 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp6_l2_head), 4)
+#define MSS4 ROUND_DOWN(IP_MAX_MTU - \
+ sizeof(struct tcphdr) - \
+ sizeof(struct iphdr), \
+ sizeof(uint32_t))
+#define MSS6 ROUND_DOWN(IP_MAX_MTU - \
+ sizeof(struct tcphdr) - \
+ sizeof(struct ipv6hdr), \
+ sizeof(uint32_t))
#define WINDOW_DEFAULT 14600 /* RFC 6928 */
#ifdef HAS_SND_WND
@@ -445,133 +421,107 @@ struct tcp_buf_seq_update {
};
/* Static buffers */
-
/**
- * tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
- * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
- * @taph: Tap-level headers (partially pre-filled)
- * @iph: Pre-filled IP header (except for tot_len and saddr)
- * @uh: Headroom for TCP header
- * @data: Storage for TCP payload
+ * struct tcp_payload_t - TCP header and data to send segments with payload
+ * @th: TCP header
+ * @data: TCP data
*/
-static struct tcp4_l2_buf_t {
-#ifdef __AVX2__
- uint8_t pad[26]; /* 0, align th to 32 bytes */
-#else
- uint8_t pad[2]; /* align iph to 4 bytes 0 */
-#endif
- struct tap_hdr taph; /* 26 2 */
- struct iphdr iph; /* 44 20 */
- struct tcphdr th; /* 64 40 */
- uint8_t data[MSS4]; /* 84 60 */
- /* 65536 65532 */
+struct tcp_payload_t {
+ struct tcphdr th;
+ uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)];
#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
+} __attribute__ ((packed, aligned(32))); /* For AVX2 checksum routines */
#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
+} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
-tcp4_l2_buf[TCP_FRAMES_MEM];
-
-static struct tcp_buf_seq_update tcp4_l2_buf_seq_update[TCP_FRAMES_MEM];
-
-static unsigned int tcp4_l2_buf_used;
/**
- * tcp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections
- * @pad: Align IPv6 header for checksum calculation to 32B (AVX2) or 4B
- * @taph: Tap-level headers (partially pre-filled)
- * @ip6h: Pre-filled IP header (except for payload_len and addresses)
- * @th: Headroom for TCP header
- * @data: Storage for TCP payload
+ * struct tcp_flags_t - TCP header and data to send zero-length
+ * segments (flags)
+ * @th: TCP header
+ * @opts TCP options
*/
-struct tcp6_l2_buf_t {
-#ifdef __AVX2__
- uint8_t pad[14]; /* 0 align ip6h to 32 bytes */
-#else
- uint8_t pad[2]; /* align ip6h to 4 bytes 0 */
-#endif
- struct tap_hdr taph; /* 14 2 */
- struct ipv6hdr ip6h; /* 32 20 */
- struct tcphdr th; /* 72 60 */
- uint8_t data[MSS6]; /* 92 80 */
- /* 65536 65532 */
+struct tcp_flags_t {
+ struct tcphdr th;
+ char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
+} __attribute__ ((packed, aligned(32)));
#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
+} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
-tcp6_l2_buf[TCP_FRAMES_MEM];
-static struct tcp_buf_seq_update tcp6_l2_buf_seq_update[TCP_FRAMES_MEM];
+/* Ethernet header for IPv4 frames */
+static struct ethhdr tcp4_eth_src;
+
+static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
+/* IPv4 headers */
+static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM];
+/* TCP segments with payload for IPv4 frames */
+static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM];
+
+static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516");
+
+static struct tcp_buf_seq_update tcp4_seq_update[TCP_FRAMES_MEM];
+static unsigned int tcp4_payload_used;
+
+static struct tap_hdr tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
+/* IPv4 headers for TCP segment without payload */
+static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
+/* TCP segments without payload for IPv4 frames */
+static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM];
+
+static unsigned int tcp4_flags_used;
+
+/* Ethernet header for IPv6 frames */
+static struct ethhdr tcp6_eth_src;
+
+static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
+/* IPv6 headers */
+static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM];
+/* TCP headers and data for IPv6 frames */
+static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM];
+
+static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516");
+
+static struct tcp_buf_seq_update tcp6_seq_update[TCP_FRAMES_MEM];
+static unsigned int tcp6_payload_used;
-static unsigned int tcp6_l2_buf_used;
+static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
+/* IPv6 headers for TCP segment without payload */
+static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
+/* TCP segment without payload for IPv6 frames */
+static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM];
+
+static unsigned int tcp6_flags_used;
/* recvmsg()/sendmsg() data for tap */
static char tcp_buf_discard [MAX_WINDOW];
static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
-static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM];
-static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM];
-static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM];
-static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM];
+/*
+ * enum tcp_iov_parts - I/O vector parts for one TCP frame
+ * @TCP_IOV_TAP tap backend specific header
+ * @TCP_IOV_ETH Ethernet header
+ * @TCP_IOV_IP IP (v4/v6) header
+ * @TCP_IOV_PAYLOAD IP payload (TCP header + data)
+ * @TCP_NUM_IOVS the number of entries in the iovec array
+ */
+enum tcp_iov_parts {
+ TCP_IOV_TAP = 0,
+ TCP_IOV_ETH = 1,
+ TCP_IOV_IP = 2,
+ TCP_IOV_PAYLOAD = 3,
+ TCP_NUM_IOVS
+};
+
+static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
/* sendmsg() to socket */
static struct iovec tcp_iov [UIO_MAXIOV];
-/**
- * tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags)
- * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
- * @taph: Tap-level headers (partially pre-filled)
- * @iph: Pre-filled IP header (except for tot_len and saddr)
- * @th: Headroom for TCP header
- * @opts: Headroom for TCP options
- */
-static struct tcp4_l2_flags_buf_t {
-#ifdef __AVX2__
- uint8_t pad[26]; /* 0, align th to 32 bytes */
-#else
- uint8_t pad[2]; /* align iph to 4 bytes 0 */
-#endif
- struct tap_hdr taph; /* 26 2 */
- struct iphdr iph; /* 44 20 */
- struct tcphdr th; /* 64 40 */
- char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
-#endif
-tcp4_l2_flags_buf[TCP_FRAMES_MEM];
-
-static unsigned int tcp4_l2_flags_buf_used;
-
-/**
- * tcp6_l2_flags_buf_t - IPv6 packet buffers for segments without data (flags)
- * @pad: Align IPv6 header for checksum calculation to 32B (AVX2) or 4B
- * @taph: Tap-level headers (partially pre-filled)
- * @ip6h: Pre-filled IP header (except for payload_len and addresses)
- * @th: Headroom for TCP header
- * @opts: Headroom for TCP options
- */
-static struct tcp6_l2_flags_buf_t {
-#ifdef __AVX2__
- uint8_t pad[14]; /* 0 align ip6h to 32 bytes */
-#else
- uint8_t pad[2]; /* align ip6h to 4 bytes 0 */
-#endif
- struct tap_hdr taph; /* 14 2 */
- struct ipv6hdr ip6h; /* 32 20 */
- struct tcphdr th /* 72 */ __attribute__ ((aligned(4))); /* 60 */
- char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)))
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
-#endif
-tcp6_l2_flags_buf[TCP_FRAMES_MEM];
-
-static unsigned int tcp6_l2_flags_buf_used;
-
#define CONN(idx) (&(FLOW(idx)->tcp))
/* Table for lookup from remote address, local port, remote port */
@@ -942,13 +892,13 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s)
*/
static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th)
{
- uint16_t tlen = ntohs(iph->tot_len) - sizeof(struct iphdr);
+ uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr);
struct in_addr saddr = { .s_addr = iph->saddr };
struct in_addr daddr = { .s_addr = iph->daddr };
- uint32_t sum = proto_ipv4_header_psum(tlen, IPPROTO_TCP, saddr, daddr);
+ uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
th->check = 0;
- th->check = csum(th, tlen, sum);
+ th->check = csum(th, l4len, sum);
}
/**
@@ -958,34 +908,23 @@ static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th)
*/
static void tcp_update_check_tcp6(struct ipv6hdr *ip6h, struct tcphdr *th)
{
- uint16_t payload_len = ntohs(ip6h->payload_len);
- uint32_t sum = proto_ipv6_header_psum(payload_len, IPPROTO_TCP,
+ uint16_t l4len = ntohs(ip6h->payload_len);
+ uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
&ip6h->saddr, &ip6h->daddr);
th->check = 0;
- th->check = csum(th, payload_len, sum);
+ th->check = csum(th, l4len, sum);
}
/**
- * tcp_update_l2_buf() - Update L2 buffers with Ethernet and IPv4 addresses
+ * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
* @eth_d: Ethernet destination address, NULL if unchanged
* @eth_s: Ethernet source address, NULL if unchanged
*/
void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
{
- int i;
-
- for (i = 0; i < TCP_FRAMES_MEM; i++) {
- struct tcp4_l2_flags_buf_t *b4f = &tcp4_l2_flags_buf[i];
- struct tcp6_l2_flags_buf_t *b6f = &tcp6_l2_flags_buf[i];
- struct tcp4_l2_buf_t *b4 = &tcp4_l2_buf[i];
- struct tcp6_l2_buf_t *b6 = &tcp6_l2_buf[i];
-
- eth_update_mac(&b4->taph.eh, eth_d, eth_s);
- eth_update_mac(&b6->taph.eh, eth_d, eth_s);
- eth_update_mac(&b4f->taph.eh, eth_d, eth_s);
- eth_update_mac(&b6f->taph.eh, eth_d, eth_s);
- }
+ eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
+ eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
}
/**
@@ -998,26 +937,38 @@ static void tcp_sock4_iov_init(const struct ctx *c)
struct iovec *iov;
int i;
- for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) {
- tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) {
- .taph = TAP_HDR_INIT(ETH_P_IP),
- .iph = iph,
- .th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
- };
+ tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
+
+ for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
+ tcp4_payload_ip[i] = iph;
+ tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
+ tcp4_payload[i].th.ack = 1;
}
- for (i = 0; i < ARRAY_SIZE(tcp4_l2_flags_buf); i++) {
- tcp4_l2_flags_buf[i] = (struct tcp4_l2_flags_buf_t) {
- .taph = TAP_HDR_INIT(ETH_P_IP),
- .iph = L2_BUF_IP4_INIT(IPPROTO_TCP)
- };
+ for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
+ tcp4_flags_ip[i] = iph;
+ tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
+ tcp4_flags[i].th.ack = 1;
+ }
+
+ for (i = 0; i < TCP_FRAMES_MEM; i++) {
+ iov = tcp4_l2_iov[i];
+
+ iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
+ iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]);
+ iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
}
- for (i = 0, iov = tcp4_l2_iov; i < TCP_FRAMES_MEM; i++, iov++)
- iov->iov_base = tap_frame_base(c, &tcp4_l2_buf[i].taph);
+ for (i = 0; i < TCP_FRAMES_MEM; i++) {
+ iov = tcp4_l2_flags_iov[i];
- for (i = 0, iov = tcp4_l2_flags_iov; i < TCP_FRAMES_MEM; i++, iov++)
- iov->iov_base = tap_frame_base(c, &tcp4_l2_flags_buf[i].taph);
+ iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
+ iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
+ iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]);
+ iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
+ }
}
/**
@@ -1026,29 +977,41 @@ static void tcp_sock4_iov_init(const struct ctx *c)
*/
static void tcp_sock6_iov_init(const struct ctx *c)
{
+ struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
struct iovec *iov;
int i;
- for (i = 0; i < ARRAY_SIZE(tcp6_l2_buf); i++) {
- tcp6_l2_buf[i] = (struct tcp6_l2_buf_t) {
- .taph = TAP_HDR_INIT(ETH_P_IPV6),
- .ip6h = L2_BUF_IP6_INIT(IPPROTO_TCP),
- .th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
- };
+ tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
+
+ for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
+ tcp6_payload_ip[i] = ip6;
+ tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
+ tcp6_payload[i].th.ack = 1;
}
- for (i = 0; i < ARRAY_SIZE(tcp6_l2_flags_buf); i++) {
- tcp6_l2_flags_buf[i] = (struct tcp6_l2_flags_buf_t) {
- .taph = TAP_HDR_INIT(ETH_P_IPV6),
- .ip6h = L2_BUF_IP6_INIT(IPPROTO_TCP)
- };
+ for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
+ tcp6_flags_ip[i] = ip6;
+ tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
+ tcp6_flags[i].th .ack = 1;
+ }
+
+ for (i = 0; i < TCP_FRAMES_MEM; i++) {
+ iov = tcp6_l2_iov[i];
+
+ iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
+ iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]);
+ iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
}
- for (i = 0, iov = tcp6_l2_iov; i < TCP_FRAMES_MEM; i++, iov++)
- iov->iov_base = tap_frame_base(c, &tcp6_l2_buf[i].taph);
+ for (i = 0; i < TCP_FRAMES_MEM; i++) {
+ iov = tcp6_l2_flags_iov[i];
- for (i = 0, iov = tcp6_l2_flags_iov; i < TCP_FRAMES_MEM; i++, iov++)
- iov->iov_base = tap_frame_base(c, &tcp6_l2_flags_buf[i].taph);
+ iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
+ iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]);
+ iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
+ }
}
/**
@@ -1284,36 +1247,40 @@ static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
} while (0)
/**
- * tcp_l2_flags_buf_flush() - Send out buffers for segments with no data (flags)
+ * tcp_flags_flush() - Send out buffers for segments with no data (flags)
* @c: Execution context
*/
-static void tcp_l2_flags_buf_flush(const struct ctx *c)
+static void tcp_flags_flush(const struct ctx *c)
{
- tap_send_frames(c, tcp6_l2_flags_iov, 1, tcp6_l2_flags_buf_used);
- tcp6_l2_flags_buf_used = 0;
+ tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS,
+ tcp6_flags_used);
+ tcp6_flags_used = 0;
- tap_send_frames(c, tcp4_l2_flags_iov, 1, tcp4_l2_flags_buf_used);
- tcp4_l2_flags_buf_used = 0;
+ tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS,
+ tcp4_flags_used);
+ tcp4_flags_used = 0;
}
/**
- * tcp_l2_data_buf_flush() - Send out buffers for segments with data
+ * tcp_payload_flush() - Send out buffers for segments with data
* @c: Execution context
*/
-static void tcp_l2_data_buf_flush(const struct ctx *c)
+static void tcp_payload_flush(const struct ctx *c)
{
unsigned i;
size_t m;
- m = tap_send_frames(c, tcp6_l2_iov, 1, tcp6_l2_buf_used);
+ m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
+ tcp6_payload_used);
for (i = 0; i < m; i++)
- *tcp6_l2_buf_seq_update[i].seq += tcp6_l2_buf_seq_update[i].len;
- tcp6_l2_buf_used = 0;
+ *tcp6_seq_update[i].seq += tcp6_seq_update[i].len;
+ tcp6_payload_used = 0;
- m = tap_send_frames(c, tcp4_l2_iov, 1, tcp4_l2_buf_used);
+ m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
+ tcp4_payload_used);
for (i = 0; i < m; i++)
- *tcp4_l2_buf_seq_update[i].seq += tcp4_l2_buf_seq_update[i].len;
- tcp4_l2_buf_used = 0;
+ *tcp4_seq_update[i].seq += tcp4_seq_update[i].len;
+ tcp4_payload_used = 0;
}
/**
@@ -1323,8 +1290,8 @@ static void tcp_l2_data_buf_flush(const struct ctx *c)
/* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
void tcp_defer_handler(struct ctx *c)
{
- tcp_l2_flags_buf_flush(c);
- tcp_l2_data_buf_flush(c);
+ tcp_flags_flush(c);
+ tcp_payload_flush(c);
}
/**
@@ -1354,60 +1321,67 @@ static void tcp_fill_header(struct tcphdr *th,
* tcp_fill_headers4() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers
* @c: Execution context
* @conn: Connection pointer
+ * @taph: tap backend specific header
* @iph: Pointer to IPv4 header
* @th: Pointer to TCP header
- * @plen: Payload length (including TCP header options)
+ * @dlen: TCP payload length
* @check: Checksum, if already known
* @seq: Sequence number for this segment
*
- * Return: The total length of the IPv4 packet, host order
+ * Return: The IPv4 payload length, host order
*/
static size_t tcp_fill_headers4(const struct ctx *c,
const struct tcp_tap_conn *conn,
+ struct tap_hdr *taph,
struct iphdr *iph, struct tcphdr *th,
- size_t plen, const uint16_t *check,
+ size_t dlen, const uint16_t *check,
uint32_t seq)
{
- size_t ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
const struct in_addr *a4 = inany_v4(&conn->faddr);
+ size_t l4len = dlen + sizeof(*th);
+ size_t l3len = l4len + sizeof(*iph);
ASSERT(a4);
- iph->tot_len = htons(ip_len);
+ iph->tot_len = htons(l3len);
iph->saddr = a4->s_addr;
iph->daddr = c->ip4.addr_seen.s_addr;
iph->check = check ? *check :
- csum_ip4_header(iph->tot_len, IPPROTO_TCP,
+ csum_ip4_header(l3len, IPPROTO_TCP,
*a4, c->ip4.addr_seen);
tcp_fill_header(th, conn, seq);
tcp_update_check_tcp4(iph, th);
- return ip_len;
+ tap_hdr_update(taph, l3len + sizeof(struct ethhdr));
+
+ return l4len;
}
/**
* tcp_fill_headers6() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers
* @c: Execution context
* @conn: Connection pointer
+ * @taph: tap backend specific header
* @ip6h: Pointer to IPv6 header
* @th: Pointer to TCP header
- * @plen: Payload length (including TCP header options)
+ * @dlen: TCP payload length
* @check: Checksum, if already known
* @seq: Sequence number for this segment
*
- * Return: The total length of the IPv6 packet, host order
+ * Return: The IPv6 payload length, host order
*/
static size_t tcp_fill_headers6(const struct ctx *c,
const struct tcp_tap_conn *conn,
+ struct tap_hdr *taph,
struct ipv6hdr *ip6h, struct tcphdr *th,
- size_t plen, uint32_t seq)
+ size_t dlen, uint32_t seq)
{
- size_t ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ size_t l4len = dlen + sizeof(*th);
- ip6h->payload_len = htons(plen + sizeof(struct tcphdr));
+ ip6h->payload_len = htons(l4len);
ip6h->saddr = conn->faddr.a6;
if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr))
ip6h->daddr = c->ip6.addr_ll_seen;
@@ -1426,45 +1400,40 @@ static size_t tcp_fill_headers6(const struct ctx *c,
tcp_update_check_tcp6(ip6h, th);
- return ip_len;
+ tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr));
+
+ return l4len;
}
/**
* tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers
* @c: Execution context
* @conn: Connection pointer
- * @p: Pointer to any type of TCP pre-cooked buffer
- * @plen: Payload length (including TCP header options)
+ * @iov: Pointer to an array of iovec of TCP pre-cooked buffers
+ * @dlen: TCP payload length
* @check: Checksum, if already known
* @seq: Sequence number for this segment
*
- * Return: frame length including L2 headers, host order
+ * Return: IP payload length, host order
*/
static size_t tcp_l2_buf_fill_headers(const struct ctx *c,
const struct tcp_tap_conn *conn,
- void *p, size_t plen,
+ struct iovec *iov, size_t dlen,
const uint16_t *check, uint32_t seq)
{
const struct in_addr *a4 = inany_v4(&conn->faddr);
- size_t ip_len, tlen;
if (a4) {
- struct tcp4_l2_buf_t *b = (struct tcp4_l2_buf_t *)p;
-
- ip_len = tcp_fill_headers4(c, conn, &b->iph, &b->th, plen,
- check, seq);
-
- tlen = tap_frame_len(c, &b->taph, ip_len);
- } else {
- struct tcp6_l2_buf_t *b = (struct tcp6_l2_buf_t *)p;
-
- ip_len = tcp_fill_headers6(c, conn, &b->ip6h, &b->th, plen,
- seq);
-
- tlen = tap_frame_len(c, &b->taph, ip_len);
+ return tcp_fill_headers4(c, conn, iov[TCP_IOV_TAP].iov_base,
+ iov[TCP_IOV_IP].iov_base,
+ iov[TCP_IOV_PAYLOAD].iov_base, dlen,
+ check, seq);
}
- return tlen;
+ return tcp_fill_headers6(c, conn, iov[TCP_IOV_TAP].iov_base,
+ iov[TCP_IOV_IP].iov_base,
+ iov[TCP_IOV_PAYLOAD].iov_base, dlen,
+ seq);
}
/**
@@ -1593,18 +1562,15 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
*/
static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
- uint32_t prev_ack_to_tap = conn->seq_ack_to_tap;
- uint32_t prev_wnd_to_tap = conn->wnd_to_tap;
- struct tcp4_l2_flags_buf_t *b4 = NULL;
- struct tcp6_l2_flags_buf_t *b6 = NULL;
+ struct tcp_flags_t *payload;
struct tcp_info tinfo = { 0 };
socklen_t sl = sizeof(tinfo);
int s = conn->sock;
size_t optlen = 0;
- struct iovec *iov;
struct tcphdr *th;
+ struct iovec *iov;
+ size_t l4len;
char *data;
- void *p;
if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) &&
!flags && conn->wnd_to_tap)
@@ -1626,19 +1592,14 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags)
return 0;
- if (CONN_V4(conn)) {
- iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used;
- p = b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++;
- th = &b4->th;
+ if (CONN_V4(conn))
+ iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+ else
+ iov = tcp6_l2_flags_iov[tcp6_flags_used++];
- /* gcc 11.2 would complain on data = (char *)(th + 1); */
- data = b4->opts;
- } else {
- iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used;
- p = b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++;
- th = &b6->th;
- data = b6->opts;
- }
+ payload = iov[TCP_IOV_PAYLOAD].iov_base;
+ th = &payload->th;
+ data = payload->opts;
if (flags & SYN) {
int mss;
@@ -1675,9 +1636,7 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
*data++ = OPT_WS_LEN;
*data++ = conn->ws_to_tap;
} else if (!(flags & RST)) {
- if (conn->seq_ack_to_tap != prev_ack_to_tap ||
- !prev_wnd_to_tap)
- flags |= ACK;
+ flags |= ACK;
}
th->doff = (sizeof(*th) + optlen) / 4;
@@ -1687,8 +1646,9 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
th->syn = !!(flags & SYN);
th->fin = !!(flags & FIN);
- iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen,
- NULL, conn->seq_to_tap);
+ l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL,
+ conn->seq_to_tap);
+ iov[TCP_IOV_PAYLOAD].iov_len = l4len;
if (th->ack) {
if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap))
@@ -1704,24 +1664,27 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
if (th->fin || th->syn)
conn->seq_to_tap++;
- if (CONN_V4(conn)) {
- if (flags & DUP_ACK) {
- memcpy(b4 + 1, b4, sizeof(*b4));
- (iov + 1)->iov_len = iov->iov_len;
- tcp4_l2_flags_buf_used++;
- }
+ if (flags & DUP_ACK) {
+ struct iovec *dup_iov;
+ int i;
- if (tcp4_l2_flags_buf_used > ARRAY_SIZE(tcp4_l2_flags_buf) - 2)
- tcp_l2_flags_buf_flush(c);
- } else {
- if (flags & DUP_ACK) {
- memcpy(b6 + 1, b6, sizeof(*b6));
- (iov + 1)->iov_len = iov->iov_len;
- tcp6_l2_flags_buf_used++;
- }
+ if (CONN_V4(conn))
+ dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+ else
+ dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++];
- if (tcp6_l2_flags_buf_used > ARRAY_SIZE(tcp6_l2_flags_buf) - 2)
- tcp_l2_flags_buf_flush(c);
+ for (i = 0; i < TCP_NUM_IOVS; i++)
+ memcpy(dup_iov[i].iov_base, iov[i].iov_base,
+ iov[i].iov_len);
+ dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len;
+ }
+
+ if (CONN_V4(conn)) {
+ if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
+ tcp_flags_flush(c);
+ } else {
+ if (tcp6_flags_used > TCP_FRAMES_MEM - 2)
+ tcp_flags_flush(c);
}
return 0;
@@ -2159,39 +2122,43 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
* tcp_data_to_tap() - Finalise (queue) highest-numbered scatter-gather buffer
* @c: Execution context
* @conn: Connection pointer
- * @plen: Payload length at L4
+ * @dlen: TCP payload length
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
* @seq: Sequence number to be sent
*/
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
- ssize_t plen, int no_csum, uint32_t seq)
+ ssize_t dlen, int no_csum, uint32_t seq)
{
uint32_t *seq_update = &conn->seq_to_tap;
struct iovec *iov;
+ size_t l4len;
if (CONN_V4(conn)) {
- struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used];
- const uint16_t *check = no_csum ? &(b - 1)->iph.check : NULL;
+ struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
+ const uint16_t *check = NULL;
- tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update;
- tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen;
+ if (no_csum) {
+ struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
+ check = &iph->check;
+ }
- iov = tcp4_l2_iov + tcp4_l2_buf_used++;
- iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
- check, seq);
- if (tcp4_l2_buf_used > ARRAY_SIZE(tcp4_l2_buf) - 1)
- tcp_l2_data_buf_flush(c);
- } else if (CONN_V6(conn)) {
- struct tcp6_l2_buf_t *b = &tcp6_l2_buf[tcp6_l2_buf_used];
+ tcp4_seq_update[tcp4_payload_used].seq = seq_update;
+ tcp4_seq_update[tcp4_payload_used].len = dlen;
- tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update;
- tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen;
+ iov = tcp4_l2_iov[tcp4_payload_used++];
+ l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, check, seq);
+ iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+ if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
+ tcp_payload_flush(c);
+ } else if (CONN_V6(conn)) {
+ tcp6_seq_update[tcp6_payload_used].seq = seq_update;
+ tcp6_seq_update[tcp6_payload_used].len = dlen;
- iov = tcp6_l2_iov + tcp6_l2_buf_used++;
- iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
- NULL, seq);
- if (tcp6_l2_buf_used > ARRAY_SIZE(tcp6_l2_buf) - 1)
- tcp_l2_data_buf_flush(c);
+ iov = tcp6_l2_iov[tcp6_payload_used++];
+ l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, NULL, seq);
+ iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+ if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
+ tcp_payload_flush(c);
}
}
@@ -2208,7 +2175,7 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
{
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
int fill_bufs, send_bufs = 0, last_len, iov_rem = 0;
- int sendlen, len, plen, v4 = CONN_V4(conn);
+ int sendlen, len, dlen, v4 = CONN_V4(conn);
int s = conn->sock, i, ret = 0;
struct msghdr mh_sock = { 0 };
uint16_t mss = MSS_GET(conn);
@@ -2246,19 +2213,19 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
iov_sock[0].iov_base = tcp_buf_discard;
iov_sock[0].iov_len = already_sent;
- if (( v4 && tcp4_l2_buf_used + fill_bufs > ARRAY_SIZE(tcp4_l2_buf)) ||
- (!v4 && tcp6_l2_buf_used + fill_bufs > ARRAY_SIZE(tcp6_l2_buf))) {
- tcp_l2_data_buf_flush(c);
+ if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) ||
+ (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
+ tcp_payload_flush(c);
/* Silence Coverity CWE-125 false positive */
- tcp4_l2_buf_used = tcp6_l2_buf_used = 0;
+ tcp4_payload_used = tcp6_payload_used = 0;
}
for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
if (v4)
- iov->iov_base = &tcp4_l2_buf[tcp4_l2_buf_used + i].data;
+ iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
else
- iov->iov_base = &tcp6_l2_buf[tcp6_l2_buf_used + i].data;
+ iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data;
iov->iov_len = mss;
}
if (iov_rem)
@@ -2300,16 +2267,16 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
tcp_update_seqack_wnd(c, conn, 0, NULL);
/* Finally, queue to tap */
- plen = mss;
+ dlen = mss;
seq = conn->seq_to_tap;
for (i = 0; i < send_bufs; i++) {
- int no_csum = i && i != send_bufs - 1 && tcp4_l2_buf_used;
+ int no_csum = i && i != send_bufs - 1 && tcp4_payload_used;
if (i == send_bufs - 1)
- plen = last_len;
+ dlen = last_len;
- tcp_data_to_tap(c, conn, plen, no_csum, seq);
- seq += plen;
+ tcp_data_to_tap(c, conn, dlen, no_csum, seq);
+ seq += dlen;
}
conn_flag(c, conn, ACK_FROM_TAP_DUE);