diff options
Diffstat (limited to 'tcp.c')
-rw-r--r-- | tcp.c | 581 |
1 files changed, 274 insertions, 307 deletions
@@ -290,6 +290,7 @@ #include "checksum.h" #include "util.h" +#include "iov.h" #include "ip.h" #include "passt.h" #include "tap.h" @@ -318,39 +319,14 @@ /* MSS rounding: see SET_MSS() */ #define MSS_DEFAULT 536 - -struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */ -#ifdef __AVX2__ - uint8_t pad[26]; -#else - uint8_t pad[2]; -#endif - struct tap_hdr taph; - struct iphdr iph; - struct tcphdr th; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))); -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); -#endif - -struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */ -#ifdef __AVX2__ - uint8_t pad[14]; -#else - uint8_t pad[2]; -#endif - struct tap_hdr taph; - struct ipv6hdr ip6h; - struct tcphdr th; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))); -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); -#endif - -#define MSS4 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp4_l2_head), 4) -#define MSS6 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp6_l2_head), 4) +#define MSS4 ROUND_DOWN(IP_MAX_MTU - \ + sizeof(struct tcphdr) - \ + sizeof(struct iphdr), \ + sizeof(uint32_t)) +#define MSS6 ROUND_DOWN(IP_MAX_MTU - \ + sizeof(struct tcphdr) - \ + sizeof(struct ipv6hdr), \ + sizeof(uint32_t)) #define WINDOW_DEFAULT 14600 /* RFC 6928 */ #ifdef HAS_SND_WND @@ -445,133 +421,107 @@ struct tcp_buf_seq_update { }; /* Static buffers */ - /** - * tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections - * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only - * @taph: Tap-level headers (partially pre-filled) - * @iph: Pre-filled IP header (except for tot_len and saddr) - * @uh: Headroom for TCP header - * @data: Storage for TCP payload + * struct tcp_payload_t - TCP header and data to send segments with payload + * @th: TCP header + * @data: TCP data */ -static struct tcp4_l2_buf_t { -#ifdef __AVX2__ - uint8_t pad[26]; /* 0, align th to 32 bytes */ -#else - uint8_t pad[2]; /* align iph to 4 bytes 0 */ -#endif - struct tap_hdr taph; /* 26 2 */ - struct iphdr iph; /* 44 20 */ - struct tcphdr th; /* 64 40 */ - uint8_t data[MSS4]; /* 84 60 */ - /* 65536 65532 */ +struct tcp_payload_t { + struct tcphdr th; + uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)]; #ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))) +} __attribute__ ((packed, aligned(32))); /* For AVX2 checksum routines */ #else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); #endif -tcp4_l2_buf[TCP_FRAMES_MEM]; - -static struct tcp_buf_seq_update tcp4_l2_buf_seq_update[TCP_FRAMES_MEM]; - -static unsigned int tcp4_l2_buf_used; /** - * tcp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections - * @pad: Align IPv6 header for checksum calculation to 32B (AVX2) or 4B - * @taph: Tap-level headers (partially pre-filled) - * @ip6h: Pre-filled IP header (except for payload_len and addresses) - * @th: Headroom for TCP header - * @data: Storage for TCP payload + * struct tcp_flags_t - TCP header and data to send zero-length + * segments (flags) + * @th: TCP header + * @opts TCP options */ -struct tcp6_l2_buf_t { -#ifdef __AVX2__ - uint8_t pad[14]; /* 0 align ip6h to 32 bytes */ -#else - uint8_t pad[2]; /* align ip6h to 4 bytes 0 */ -#endif - struct tap_hdr taph; /* 14 2 */ - struct ipv6hdr ip6h; /* 32 20 */ - struct tcphdr th; /* 72 60 */ - uint8_t data[MSS6]; /* 92 80 */ - /* 65536 65532 */ +struct tcp_flags_t { + struct tcphdr th; + char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; #ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))) +} __attribute__ ((packed, aligned(32))); #else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); #endif -tcp6_l2_buf[TCP_FRAMES_MEM]; -static struct tcp_buf_seq_update tcp6_l2_buf_seq_update[TCP_FRAMES_MEM]; +/* Ethernet header for IPv4 frames */ +static struct ethhdr tcp4_eth_src; + +static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; +/* IPv4 headers */ +static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM]; +/* TCP segments with payload for IPv4 frames */ +static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM]; + +static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516"); + +static struct tcp_buf_seq_update tcp4_seq_update[TCP_FRAMES_MEM]; +static unsigned int tcp4_payload_used; + +static struct tap_hdr tcp4_flags_tap_hdr[TCP_FRAMES_MEM]; +/* IPv4 headers for TCP segment without payload */ +static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM]; +/* TCP segments without payload for IPv4 frames */ +static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM]; + +static unsigned int tcp4_flags_used; + +/* Ethernet header for IPv6 frames */ +static struct ethhdr tcp6_eth_src; + +static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; +/* IPv6 headers */ +static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM]; +/* TCP headers and data for IPv6 frames */ +static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; + +static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516"); + +static struct tcp_buf_seq_update tcp6_seq_update[TCP_FRAMES_MEM]; +static unsigned int tcp6_payload_used; -static unsigned int tcp6_l2_buf_used; +static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM]; +/* IPv6 headers for TCP segment without payload */ +static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM]; +/* TCP segment without payload for IPv6 frames */ +static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM]; + +static unsigned int tcp6_flags_used; /* recvmsg()/sendmsg() data for tap */ static char tcp_buf_discard [MAX_WINDOW]; static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; -static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM]; -static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM]; -static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM]; -static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM]; +/* + * enum tcp_iov_parts - I/O vector parts for one TCP frame + * @TCP_IOV_TAP tap backend specific header + * @TCP_IOV_ETH Ethernet header + * @TCP_IOV_IP IP (v4/v6) header + * @TCP_IOV_PAYLOAD IP payload (TCP header + data) + * @TCP_NUM_IOVS the number of entries in the iovec array + */ +enum tcp_iov_parts { + TCP_IOV_TAP = 0, + TCP_IOV_ETH = 1, + TCP_IOV_IP = 2, + TCP_IOV_PAYLOAD = 3, + TCP_NUM_IOVS +}; + +static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; +static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; +static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; +static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; /* sendmsg() to socket */ static struct iovec tcp_iov [UIO_MAXIOV]; -/** - * tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags) - * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only - * @taph: Tap-level headers (partially pre-filled) - * @iph: Pre-filled IP header (except for tot_len and saddr) - * @th: Headroom for TCP header - * @opts: Headroom for TCP options - */ -static struct tcp4_l2_flags_buf_t { -#ifdef __AVX2__ - uint8_t pad[26]; /* 0, align th to 32 bytes */ -#else - uint8_t pad[2]; /* align iph to 4 bytes 0 */ -#endif - struct tap_hdr taph; /* 26 2 */ - struct iphdr iph; /* 44 20 */ - struct tcphdr th; /* 64 40 */ - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))) -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) -#endif -tcp4_l2_flags_buf[TCP_FRAMES_MEM]; - -static unsigned int tcp4_l2_flags_buf_used; - -/** - * tcp6_l2_flags_buf_t - IPv6 packet buffers for segments without data (flags) - * @pad: Align IPv6 header for checksum calculation to 32B (AVX2) or 4B - * @taph: Tap-level headers (partially pre-filled) - * @ip6h: Pre-filled IP header (except for payload_len and addresses) - * @th: Headroom for TCP header - * @opts: Headroom for TCP options - */ -static struct tcp6_l2_flags_buf_t { -#ifdef __AVX2__ - uint8_t pad[14]; /* 0 align ip6h to 32 bytes */ -#else - uint8_t pad[2]; /* align ip6h to 4 bytes 0 */ -#endif - struct tap_hdr taph; /* 14 2 */ - struct ipv6hdr ip6h; /* 32 20 */ - struct tcphdr th /* 72 */ __attribute__ ((aligned(4))); /* 60 */ - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))) -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) -#endif -tcp6_l2_flags_buf[TCP_FRAMES_MEM]; - -static unsigned int tcp6_l2_flags_buf_used; - #define CONN(idx) (&(FLOW(idx)->tcp)) /* Table for lookup from remote address, local port, remote port */ @@ -942,13 +892,13 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s) */ static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th) { - uint16_t tlen = ntohs(iph->tot_len) - sizeof(struct iphdr); + uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; - uint32_t sum = proto_ipv4_header_psum(tlen, IPPROTO_TCP, saddr, daddr); + uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); th->check = 0; - th->check = csum(th, tlen, sum); + th->check = csum(th, l4len, sum); } /** @@ -958,34 +908,23 @@ static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th) */ static void tcp_update_check_tcp6(struct ipv6hdr *ip6h, struct tcphdr *th) { - uint16_t payload_len = ntohs(ip6h->payload_len); - uint32_t sum = proto_ipv6_header_psum(payload_len, IPPROTO_TCP, + uint16_t l4len = ntohs(ip6h->payload_len); + uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr, &ip6h->daddr); th->check = 0; - th->check = csum(th, payload_len, sum); + th->check = csum(th, l4len, sum); } /** - * tcp_update_l2_buf() - Update L2 buffers with Ethernet and IPv4 addresses + * tcp_update_l2_buf() - Update Ethernet header buffers with addresses * @eth_d: Ethernet destination address, NULL if unchanged * @eth_s: Ethernet source address, NULL if unchanged */ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) { - int i; - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - struct tcp4_l2_flags_buf_t *b4f = &tcp4_l2_flags_buf[i]; - struct tcp6_l2_flags_buf_t *b6f = &tcp6_l2_flags_buf[i]; - struct tcp4_l2_buf_t *b4 = &tcp4_l2_buf[i]; - struct tcp6_l2_buf_t *b6 = &tcp6_l2_buf[i]; - - eth_update_mac(&b4->taph.eh, eth_d, eth_s); - eth_update_mac(&b6->taph.eh, eth_d, eth_s); - eth_update_mac(&b4f->taph.eh, eth_d, eth_s); - eth_update_mac(&b6f->taph.eh, eth_d, eth_s); - } + eth_update_mac(&tcp4_eth_src, eth_d, eth_s); + eth_update_mac(&tcp6_eth_src, eth_d, eth_s); } /** @@ -998,26 +937,38 @@ static void tcp_sock4_iov_init(const struct ctx *c) struct iovec *iov; int i; - for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) { - tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) { - .taph = TAP_HDR_INIT(ETH_P_IP), - .iph = iph, - .th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 } - }; + tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); + + for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) { + tcp4_payload_ip[i] = iph; + tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4; + tcp4_payload[i].th.ack = 1; } - for (i = 0; i < ARRAY_SIZE(tcp4_l2_flags_buf); i++) { - tcp4_l2_flags_buf[i] = (struct tcp4_l2_flags_buf_t) { - .taph = TAP_HDR_INIT(ETH_P_IP), - .iph = L2_BUF_IP4_INIT(IPPROTO_TCP) - }; + for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) { + tcp4_flags_ip[i] = iph; + tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4; + tcp4_flags[i].th.ack = 1; + } + + for (i = 0; i < TCP_FRAMES_MEM; i++) { + iov = tcp4_l2_iov[i]; + + iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]); + iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]); + iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i]; } - for (i = 0, iov = tcp4_l2_iov; i < TCP_FRAMES_MEM; i++, iov++) - iov->iov_base = tap_frame_base(c, &tcp4_l2_buf[i].taph); + for (i = 0; i < TCP_FRAMES_MEM; i++) { + iov = tcp4_l2_flags_iov[i]; - for (i = 0, iov = tcp4_l2_flags_iov; i < TCP_FRAMES_MEM; i++, iov++) - iov->iov_base = tap_frame_base(c, &tcp4_l2_flags_buf[i].taph); + iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]); + iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; + iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]); + iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i]; + } } /** @@ -1026,29 +977,41 @@ static void tcp_sock4_iov_init(const struct ctx *c) */ static void tcp_sock6_iov_init(const struct ctx *c) { + struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP); struct iovec *iov; int i; - for (i = 0; i < ARRAY_SIZE(tcp6_l2_buf); i++) { - tcp6_l2_buf[i] = (struct tcp6_l2_buf_t) { - .taph = TAP_HDR_INIT(ETH_P_IPV6), - .ip6h = L2_BUF_IP6_INIT(IPPROTO_TCP), - .th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 } - }; + tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); + + for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { + tcp6_payload_ip[i] = ip6; + tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; + tcp6_payload[i].th.ack = 1; } - for (i = 0; i < ARRAY_SIZE(tcp6_l2_flags_buf); i++) { - tcp6_l2_flags_buf[i] = (struct tcp6_l2_flags_buf_t) { - .taph = TAP_HDR_INIT(ETH_P_IPV6), - .ip6h = L2_BUF_IP6_INIT(IPPROTO_TCP) - }; + for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) { + tcp6_flags_ip[i] = ip6; + tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4; + tcp6_flags[i].th .ack = 1; + } + + for (i = 0; i < TCP_FRAMES_MEM; i++) { + iov = tcp6_l2_iov[i]; + + iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]); + iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]); + iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i]; } - for (i = 0, iov = tcp6_l2_iov; i < TCP_FRAMES_MEM; i++, iov++) - iov->iov_base = tap_frame_base(c, &tcp6_l2_buf[i].taph); + for (i = 0; i < TCP_FRAMES_MEM; i++) { + iov = tcp6_l2_flags_iov[i]; - for (i = 0, iov = tcp6_l2_flags_iov; i < TCP_FRAMES_MEM; i++, iov++) - iov->iov_base = tap_frame_base(c, &tcp6_l2_flags_buf[i].taph); + iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]); + iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]); + iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i]; + } } /** @@ -1284,36 +1247,40 @@ static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); } while (0) /** - * tcp_l2_flags_buf_flush() - Send out buffers for segments with no data (flags) + * tcp_flags_flush() - Send out buffers for segments with no data (flags) * @c: Execution context */ -static void tcp_l2_flags_buf_flush(const struct ctx *c) +static void tcp_flags_flush(const struct ctx *c) { - tap_send_frames(c, tcp6_l2_flags_iov, 1, tcp6_l2_flags_buf_used); - tcp6_l2_flags_buf_used = 0; + tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS, + tcp6_flags_used); + tcp6_flags_used = 0; - tap_send_frames(c, tcp4_l2_flags_iov, 1, tcp4_l2_flags_buf_used); - tcp4_l2_flags_buf_used = 0; + tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS, + tcp4_flags_used); + tcp4_flags_used = 0; } /** - * tcp_l2_data_buf_flush() - Send out buffers for segments with data + * tcp_payload_flush() - Send out buffers for segments with data * @c: Execution context */ -static void tcp_l2_data_buf_flush(const struct ctx *c) +static void tcp_payload_flush(const struct ctx *c) { unsigned i; size_t m; - m = tap_send_frames(c, tcp6_l2_iov, 1, tcp6_l2_buf_used); + m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS, + tcp6_payload_used); for (i = 0; i < m; i++) - *tcp6_l2_buf_seq_update[i].seq += tcp6_l2_buf_seq_update[i].len; - tcp6_l2_buf_used = 0; + *tcp6_seq_update[i].seq += tcp6_seq_update[i].len; + tcp6_payload_used = 0; - m = tap_send_frames(c, tcp4_l2_iov, 1, tcp4_l2_buf_used); + m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS, + tcp4_payload_used); for (i = 0; i < m; i++) - *tcp4_l2_buf_seq_update[i].seq += tcp4_l2_buf_seq_update[i].len; - tcp4_l2_buf_used = 0; + *tcp4_seq_update[i].seq += tcp4_seq_update[i].len; + tcp4_payload_used = 0; } /** @@ -1323,8 +1290,8 @@ static void tcp_l2_data_buf_flush(const struct ctx *c) /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */ void tcp_defer_handler(struct ctx *c) { - tcp_l2_flags_buf_flush(c); - tcp_l2_data_buf_flush(c); + tcp_flags_flush(c); + tcp_payload_flush(c); } /** @@ -1354,60 +1321,67 @@ static void tcp_fill_header(struct tcphdr *th, * tcp_fill_headers4() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers * @c: Execution context * @conn: Connection pointer + * @taph: tap backend specific header * @iph: Pointer to IPv4 header * @th: Pointer to TCP header - * @plen: Payload length (including TCP header options) + * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment * - * Return: The total length of the IPv4 packet, host order + * Return: The IPv4 payload length, host order */ static size_t tcp_fill_headers4(const struct ctx *c, const struct tcp_tap_conn *conn, + struct tap_hdr *taph, struct iphdr *iph, struct tcphdr *th, - size_t plen, const uint16_t *check, + size_t dlen, const uint16_t *check, uint32_t seq) { - size_t ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr); const struct in_addr *a4 = inany_v4(&conn->faddr); + size_t l4len = dlen + sizeof(*th); + size_t l3len = l4len + sizeof(*iph); ASSERT(a4); - iph->tot_len = htons(ip_len); + iph->tot_len = htons(l3len); iph->saddr = a4->s_addr; iph->daddr = c->ip4.addr_seen.s_addr; iph->check = check ? *check : - csum_ip4_header(iph->tot_len, IPPROTO_TCP, + csum_ip4_header(l3len, IPPROTO_TCP, *a4, c->ip4.addr_seen); tcp_fill_header(th, conn, seq); tcp_update_check_tcp4(iph, th); - return ip_len; + tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); + + return l4len; } /** * tcp_fill_headers6() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers * @c: Execution context * @conn: Connection pointer + * @taph: tap backend specific header * @ip6h: Pointer to IPv6 header * @th: Pointer to TCP header - * @plen: Payload length (including TCP header options) + * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment * - * Return: The total length of the IPv6 packet, host order + * Return: The IPv6 payload length, host order */ static size_t tcp_fill_headers6(const struct ctx *c, const struct tcp_tap_conn *conn, + struct tap_hdr *taph, struct ipv6hdr *ip6h, struct tcphdr *th, - size_t plen, uint32_t seq) + size_t dlen, uint32_t seq) { - size_t ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + size_t l4len = dlen + sizeof(*th); - ip6h->payload_len = htons(plen + sizeof(struct tcphdr)); + ip6h->payload_len = htons(l4len); ip6h->saddr = conn->faddr.a6; if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr)) ip6h->daddr = c->ip6.addr_ll_seen; @@ -1426,45 +1400,40 @@ static size_t tcp_fill_headers6(const struct ctx *c, tcp_update_check_tcp6(ip6h, th); - return ip_len; + tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr)); + + return l4len; } /** * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers * @c: Execution context * @conn: Connection pointer - * @p: Pointer to any type of TCP pre-cooked buffer - * @plen: Payload length (including TCP header options) + * @iov: Pointer to an array of iovec of TCP pre-cooked buffers + * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment * - * Return: frame length including L2 headers, host order + * Return: IP payload length, host order */ static size_t tcp_l2_buf_fill_headers(const struct ctx *c, const struct tcp_tap_conn *conn, - void *p, size_t plen, + struct iovec *iov, size_t dlen, const uint16_t *check, uint32_t seq) { const struct in_addr *a4 = inany_v4(&conn->faddr); - size_t ip_len, tlen; if (a4) { - struct tcp4_l2_buf_t *b = (struct tcp4_l2_buf_t *)p; - - ip_len = tcp_fill_headers4(c, conn, &b->iph, &b->th, plen, - check, seq); - - tlen = tap_frame_len(c, &b->taph, ip_len); - } else { - struct tcp6_l2_buf_t *b = (struct tcp6_l2_buf_t *)p; - - ip_len = tcp_fill_headers6(c, conn, &b->ip6h, &b->th, plen, - seq); - - tlen = tap_frame_len(c, &b->taph, ip_len); + return tcp_fill_headers4(c, conn, iov[TCP_IOV_TAP].iov_base, + iov[TCP_IOV_IP].iov_base, + iov[TCP_IOV_PAYLOAD].iov_base, dlen, + check, seq); } - return tlen; + return tcp_fill_headers6(c, conn, iov[TCP_IOV_TAP].iov_base, + iov[TCP_IOV_IP].iov_base, + iov[TCP_IOV_PAYLOAD].iov_base, dlen, + seq); } /** @@ -1593,18 +1562,15 @@ static void tcp_update_seqack_from_tap(const struct ctx *c, */ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) { - uint32_t prev_ack_to_tap = conn->seq_ack_to_tap; - uint32_t prev_wnd_to_tap = conn->wnd_to_tap; - struct tcp4_l2_flags_buf_t *b4 = NULL; - struct tcp6_l2_flags_buf_t *b6 = NULL; + struct tcp_flags_t *payload; struct tcp_info tinfo = { 0 }; socklen_t sl = sizeof(tinfo); int s = conn->sock; size_t optlen = 0; - struct iovec *iov; struct tcphdr *th; + struct iovec *iov; + size_t l4len; char *data; - void *p; if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) && !flags && conn->wnd_to_tap) @@ -1626,19 +1592,14 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags) return 0; - if (CONN_V4(conn)) { - iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used; - p = b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++; - th = &b4->th; + if (CONN_V4(conn)) + iov = tcp4_l2_flags_iov[tcp4_flags_used++]; + else + iov = tcp6_l2_flags_iov[tcp6_flags_used++]; - /* gcc 11.2 would complain on data = (char *)(th + 1); */ - data = b4->opts; - } else { - iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used; - p = b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++; - th = &b6->th; - data = b6->opts; - } + payload = iov[TCP_IOV_PAYLOAD].iov_base; + th = &payload->th; + data = payload->opts; if (flags & SYN) { int mss; @@ -1675,9 +1636,7 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) *data++ = OPT_WS_LEN; *data++ = conn->ws_to_tap; } else if (!(flags & RST)) { - if (conn->seq_ack_to_tap != prev_ack_to_tap || - !prev_wnd_to_tap) - flags |= ACK; + flags |= ACK; } th->doff = (sizeof(*th) + optlen) / 4; @@ -1687,8 +1646,9 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) th->syn = !!(flags & SYN); th->fin = !!(flags & FIN); - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen, - NULL, conn->seq_to_tap); + l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL, + conn->seq_to_tap); + iov[TCP_IOV_PAYLOAD].iov_len = l4len; if (th->ack) { if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap)) @@ -1704,24 +1664,27 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (th->fin || th->syn) conn->seq_to_tap++; - if (CONN_V4(conn)) { - if (flags & DUP_ACK) { - memcpy(b4 + 1, b4, sizeof(*b4)); - (iov + 1)->iov_len = iov->iov_len; - tcp4_l2_flags_buf_used++; - } + if (flags & DUP_ACK) { + struct iovec *dup_iov; + int i; - if (tcp4_l2_flags_buf_used > ARRAY_SIZE(tcp4_l2_flags_buf) - 2) - tcp_l2_flags_buf_flush(c); - } else { - if (flags & DUP_ACK) { - memcpy(b6 + 1, b6, sizeof(*b6)); - (iov + 1)->iov_len = iov->iov_len; - tcp6_l2_flags_buf_used++; - } + if (CONN_V4(conn)) + dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++]; + else + dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++]; - if (tcp6_l2_flags_buf_used > ARRAY_SIZE(tcp6_l2_flags_buf) - 2) - tcp_l2_flags_buf_flush(c); + for (i = 0; i < TCP_NUM_IOVS; i++) + memcpy(dup_iov[i].iov_base, iov[i].iov_base, + iov[i].iov_len); + dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len; + } + + if (CONN_V4(conn)) { + if (tcp4_flags_used > TCP_FRAMES_MEM - 2) + tcp_flags_flush(c); + } else { + if (tcp6_flags_used > TCP_FRAMES_MEM - 2) + tcp_flags_flush(c); } return 0; @@ -2159,39 +2122,43 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq) * tcp_data_to_tap() - Finalise (queue) highest-numbered scatter-gather buffer * @c: Execution context * @conn: Connection pointer - * @plen: Payload length at L4 + * @dlen: TCP payload length * @no_csum: Don't compute IPv4 checksum, use the one from previous buffer * @seq: Sequence number to be sent */ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, - ssize_t plen, int no_csum, uint32_t seq) + ssize_t dlen, int no_csum, uint32_t seq) { uint32_t *seq_update = &conn->seq_to_tap; struct iovec *iov; + size_t l4len; if (CONN_V4(conn)) { - struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used]; - const uint16_t *check = no_csum ? &(b - 1)->iph.check : NULL; + struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1]; + const uint16_t *check = NULL; - tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update; - tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen; + if (no_csum) { + struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base; + check = &iph->check; + } - iov = tcp4_l2_iov + tcp4_l2_buf_used++; - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen, - check, seq); - if (tcp4_l2_buf_used > ARRAY_SIZE(tcp4_l2_buf) - 1) - tcp_l2_data_buf_flush(c); - } else if (CONN_V6(conn)) { - struct tcp6_l2_buf_t *b = &tcp6_l2_buf[tcp6_l2_buf_used]; + tcp4_seq_update[tcp4_payload_used].seq = seq_update; + tcp4_seq_update[tcp4_payload_used].len = dlen; - tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update; - tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen; + iov = tcp4_l2_iov[tcp4_payload_used++]; + l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, check, seq); + iov[TCP_IOV_PAYLOAD].iov_len = l4len; + if (tcp4_payload_used > TCP_FRAMES_MEM - 1) + tcp_payload_flush(c); + } else if (CONN_V6(conn)) { + tcp6_seq_update[tcp6_payload_used].seq = seq_update; + tcp6_seq_update[tcp6_payload_used].len = dlen; - iov = tcp6_l2_iov + tcp6_l2_buf_used++; - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen, - NULL, seq); - if (tcp6_l2_buf_used > ARRAY_SIZE(tcp6_l2_buf) - 1) - tcp_l2_data_buf_flush(c); + iov = tcp6_l2_iov[tcp6_payload_used++]; + l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, NULL, seq); + iov[TCP_IOV_PAYLOAD].iov_len = l4len; + if (tcp6_payload_used > TCP_FRAMES_MEM - 1) + tcp_payload_flush(c); } } @@ -2208,7 +2175,7 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) { uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap; int fill_bufs, send_bufs = 0, last_len, iov_rem = 0; - int sendlen, len, plen, v4 = CONN_V4(conn); + int sendlen, len, dlen, v4 = CONN_V4(conn); int s = conn->sock, i, ret = 0; struct msghdr mh_sock = { 0 }; uint16_t mss = MSS_GET(conn); @@ -2246,19 +2213,19 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) iov_sock[0].iov_base = tcp_buf_discard; iov_sock[0].iov_len = already_sent; - if (( v4 && tcp4_l2_buf_used + fill_bufs > ARRAY_SIZE(tcp4_l2_buf)) || - (!v4 && tcp6_l2_buf_used + fill_bufs > ARRAY_SIZE(tcp6_l2_buf))) { - tcp_l2_data_buf_flush(c); + if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) || + (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) { + tcp_payload_flush(c); /* Silence Coverity CWE-125 false positive */ - tcp4_l2_buf_used = tcp6_l2_buf_used = 0; + tcp4_payload_used = tcp6_payload_used = 0; } for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) { if (v4) - iov->iov_base = &tcp4_l2_buf[tcp4_l2_buf_used + i].data; + iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data; else - iov->iov_base = &tcp6_l2_buf[tcp6_l2_buf_used + i].data; + iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data; iov->iov_len = mss; } if (iov_rem) @@ -2300,16 +2267,16 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) tcp_update_seqack_wnd(c, conn, 0, NULL); /* Finally, queue to tap */ - plen = mss; + dlen = mss; seq = conn->seq_to_tap; for (i = 0; i < send_bufs; i++) { - int no_csum = i && i != send_bufs - 1 && tcp4_l2_buf_used; + int no_csum = i && i != send_bufs - 1 && tcp4_payload_used; if (i == send_bufs - 1) - plen = last_len; + dlen = last_len; - tcp_data_to_tap(c, conn, plen, no_csum, seq); - seq += plen; + tcp_data_to_tap(c, conn, dlen, no_csum, seq); + seq += dlen; } conn_flag(c, conn, ACK_FROM_TAP_DUE); |