diff options
Diffstat (limited to 'tcp_buf.c')
-rw-r--r-- | tcp_buf.c | 462 |
1 files changed, 190 insertions, 272 deletions
@@ -6,9 +6,9 @@ * PASTA - Pack A Subtle Tap Abstraction * for network namespace/tap device mode * - * tcp_buf.c - TCP L2-L4 translation state machine + * tcp_buf.c - TCP L2 buffer management functions * - * Copyright (c) 2020-2022 Red Hat GmbH + * Copyright Red Hat * Author: Stefano Brivio <sbrivio@redhat.com> */ @@ -20,10 +20,11 @@ #include <netinet/ip.h> -#include <linux/tcp.h> +#include <netinet/tcp.h> #include "util.h" #include "ip.h" +#include "iov.h" #include "passt.h" #include "tap.h" #include "siphash.h" @@ -33,283 +34,201 @@ #include "tcp_buf.h" #define TCP_FRAMES_MEM 128 -#define TCP_FRAMES \ +#define TCP_FRAMES \ (c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM) -/** - * tcp_buf_seq_update - Sequences to update with length of frames once sent - * @seq: Pointer to sequence number sent to tap-side, to be updated - * @len: TCP payload length - */ -struct tcp_buf_seq_update { - uint32_t *seq; - uint16_t len; -}; - /* Static buffers */ -/** - * tcp_l2_flags_t - TCP header and data to send option flags - * @th: TCP header - * @opts TCP option flags - */ -struct tcp_l2_flags_t { - struct tcphdr th; - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; -}; -/** - * tcp_l2_payload_t - TCP header and data to send data - * 32 bytes aligned to be able to use AVX2 checksum - * @th: TCP header - * @data: TCP data - */ -struct tcp_l2_payload_t { - struct tcphdr th; /* 20 bytes */ - uint8_t data[MSS]; /* 65516 bytes */ -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))); -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); -#endif - -/* Ethernet header for IPv4 frames */ -static struct ethhdr tcp4_eth_src; - -/* IPv4 headers */ -static struct iphdr tcp4_l2_ip[TCP_FRAMES_MEM]; -/* TCP headers and data for IPv4 frames */ -static struct tcp_l2_payload_t tcp4_l2_payload[TCP_FRAMES_MEM]; - -static struct tcp_buf_seq_update tcp4_l2_buf_seq_update[TCP_FRAMES_MEM]; -static unsigned int tcp4_l2_buf_used; - -/* IPv4 headers for TCP option flags frames */ -static struct iphdr tcp4_l2_flags_ip[TCP_FRAMES_MEM]; -/* TCP headers and option flags for IPv4 frames */ -static struct tcp_l2_flags_t tcp4_l2_flags[TCP_FRAMES_MEM]; -static unsigned int tcp4_l2_flags_buf_used; - -/* Ethernet header for IPv6 frames */ +/* Ethernet header for IPv4 and IPv6 frames */ +static struct ethhdr tcp4_eth_src; static struct ethhdr tcp6_eth_src; -/* IPv6 headers */ -static struct ipv6hdr tcp6_l2_ip[TCP_FRAMES_MEM]; -/* TCP headers and data for IPv6 frames */ -static struct tcp_l2_payload_t tcp6_l2_payload[TCP_FRAMES_MEM]; +static struct tap_hdr tcp_payload_tap_hdr[TCP_FRAMES_MEM]; + +/* IP headers for IPv4 and IPv6 */ +struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM]; +struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM]; -static struct tcp_buf_seq_update tcp6_l2_buf_seq_update[TCP_FRAMES_MEM]; -static unsigned int tcp6_l2_buf_used; +/* TCP segments with payload for IPv4 and IPv6 frames */ +static struct tcp_payload_t tcp_payload[TCP_FRAMES_MEM]; -/* IPv6 headers for TCP option flags frames */ -static struct ipv6hdr tcp6_l2_flags_ip[TCP_FRAMES_MEM]; -/* TCP headers and option flags for IPv6 frames */ -static struct tcp_l2_flags_t tcp6_l2_flags[TCP_FRAMES_MEM]; +static_assert(MSS4 <= sizeof(tcp_payload[0].data), "MSS4 is greater than 65516"); +static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516"); -static unsigned int tcp6_l2_flags_buf_used; +/* References tracking the owner connection of frames in the tap outqueue */ +static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM]; +static unsigned int tcp_payload_used; /* recvmsg()/sendmsg() data for tap */ static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; -static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_IOV_NUM]; -static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_IOV_NUM]; -static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_IOV_NUM]; -static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_IOV_NUM]; +static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; /** - * tcp_buf_update_l2() - Update L2 buffers with Ethernet and IPv4 addresses + * tcp_update_l2_buf() - Update Ethernet header buffers with addresses * @eth_d: Ethernet destination address, NULL if unchanged * @eth_s: Ethernet source address, NULL if unchanged */ -void tcp_buf_update_l2(const unsigned char *eth_d, const unsigned char *eth_s) +void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) { eth_update_mac(&tcp4_eth_src, eth_d, eth_s); eth_update_mac(&tcp6_eth_src, eth_d, eth_s); } /** - * tcp_buf_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets + * tcp_sock_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets * @c: Execution context */ -void tcp_buf_sock4_iov_init(const struct ctx *c) +void tcp_sock_iov_init(const struct ctx *c) { + struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP); struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP); int i; - (void)c; - + tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); + + for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) { + tcp6_payload_ip[i] = ip6; + tcp4_payload_ip[i] = iph; + } + for (i = 0; i < TCP_FRAMES_MEM; i++) { - struct iovec *iov; - - /* headers */ - tcp4_l2_ip[i] = iph; - tcp4_l2_payload[i].th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; - - tcp4_l2_flags_ip[i] = iph; - tcp4_l2_flags[i].th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; - - /* iovecs */ - iov = tcp4_l2_iov[i]; - iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; - iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); - iov[TCP_IOV_IP].iov_base = &tcp4_l2_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_l2_payload[i]; + struct iovec *iov = tcp_l2_iov[i]; - iov = tcp4_l2_flags_iov[i]; - iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; + iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_payload_tap_hdr[i]); iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); - iov[TCP_IOV_IP].iov_base = &tcp4_l2_flags_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_l2_flags[i]; + iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i]; } } /** - * tcp_buf_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets - * @c: Execution context + * tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission + * @ctx: Execution context + * @conns: Array of connection pointers corresponding to queued frames + * @frames: Two-dimensional array containing queued frames with sub-iovs + * @num_frames: Number of entries in the two arrays to be compared */ -void tcp_buf_sock6_iov_init(const struct ctx *c) +static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns, + struct iovec (*frames)[TCP_NUM_IOVS], int num_frames) { - struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP); int i; - (void)c; + for (i = 0; i < num_frames; i++) { + const struct tcphdr *th = frames[i][TCP_IOV_PAYLOAD].iov_base; + struct tcp_tap_conn *conn = conns[i]; + uint32_t seq = ntohl(th->seq); + uint32_t peek_offset; - tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); - for (i = 0; i < TCP_FRAMES_MEM; i++) { - struct iovec *iov; - - /* headers */ - tcp6_l2_ip[i] = ip6; - tcp6_l2_payload[i].th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; - - tcp6_l2_flags_ip[i] = ip6; - tcp6_l2_flags[i].th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; - - /* iovecs */ - iov = tcp6_l2_iov[i]; - iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; - iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); - iov[TCP_IOV_IP].iov_base = &tcp6_l2_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_l2_payload[i]; + if (SEQ_LE(conn->seq_to_tap, seq)) + continue; - iov = tcp6_l2_flags_iov[i]; - iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; - iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); - iov[TCP_IOV_IP].iov_base = &tcp6_l2_flags_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_l2_flags[i]; + conn->seq_to_tap = seq; + peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap; + if (tcp_set_peek_offset(conn->sock, peek_offset)) + tcp_rst(c, conn); } } /** - * tcp_buf_l2_flags_flush() - Send out buffers for segments with no data (flags) + * tcp_payload_flush() - Send out buffers for segments with data or flags * @c: Execution context */ -void tcp_buf_l2_flags_flush(const struct ctx *c) +void tcp_payload_flush(const struct ctx *c) { - tap_send_iov(c, tcp6_l2_flags_iov, tcp6_l2_flags_buf_used); - tcp6_l2_flags_buf_used = 0; + size_t m; - tap_send_iov(c, tcp4_l2_flags_iov, tcp4_l2_flags_buf_used); - tcp4_l2_flags_buf_used = 0; + m = tap_send_frames(c, &tcp_l2_iov[0][0], TCP_NUM_IOVS, + tcp_payload_used); + if (m != tcp_payload_used) { + tcp_revert_seq(c, &tcp_frame_conns[m], &tcp_l2_iov[m], + tcp_payload_used - m); + } + tcp_payload_used = 0; } /** - * tcp_buf_l2_data_flush() - Send out buffers for segments with data - * @c: Execution context + * tcp_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers + * @conn: Connection pointer + * @iov: Pointer to an array of iovec of TCP pre-cooked buffers + * @dlen: TCP payload length + * @check: Checksum, if already known + * @seq: Sequence number for this segment + * @no_tcp_csum: Do not set TCP checksum */ -void tcp_buf_l2_data_flush(const struct ctx *c) +static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, + struct iovec *iov, size_t dlen, + const uint16_t *check, uint32_t seq, + bool no_tcp_csum) { - unsigned i; - size_t m; - - m = tap_send_iov(c, tcp6_l2_iov, tcp6_l2_buf_used); - for (i = 0; i < m; i++) - *tcp6_l2_buf_seq_update[i].seq += tcp6_l2_buf_seq_update[i].len; - tcp6_l2_buf_used = 0; - - m = tap_send_iov(c, tcp4_l2_iov, tcp4_l2_buf_used); - for (i = 0; i < m; i++) - *tcp4_l2_buf_seq_update[i].seq += tcp4_l2_buf_seq_update[i].len; - tcp4_l2_buf_used = 0; + const struct flowside *tapside = TAPFLOW(conn); + const struct in_addr *a4 = inany_v4(&tapside->oaddr); + + if (a4) { + tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, + iov[TCP_IOV_IP].iov_base, + iov[TCP_IOV_PAYLOAD].iov_base, dlen, + check, seq, no_tcp_csum); + } else { + tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, + iov[TCP_IOV_IP].iov_base, + iov[TCP_IOV_PAYLOAD].iov_base, dlen, + seq, no_tcp_csum); + } } -int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) +/** + * tcp_buf_send_flag() - Send segment with flags to tap (no payload) + * @c: Execution context + * @conn: Connection pointer + * @flags: TCP flags: if not set, send segment only if ACK is due + * + * Return: negative error code on connection reset, 0 otherwise + */ +int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) { - struct tcp_l2_flags_t *payload; - struct iovec *dup_iov; + struct tcp_payload_t *payload; struct iovec *iov; - struct tcphdr *th; - size_t optlen = 0; - size_t ip_len; - char *data; + size_t optlen; + size_t l4len; + uint32_t seq; int ret; + iov = tcp_l2_iov[tcp_payload_used]; if (CONN_V4(conn)) { - iov = tcp4_l2_flags_iov[tcp4_l2_flags_buf_used++]; - dup_iov = tcp4_l2_flags_iov[tcp4_l2_flags_buf_used]; + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]); + iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; } else { - iov = tcp6_l2_flags_iov[tcp6_l2_flags_buf_used++]; - dup_iov = tcp6_l2_flags_iov[tcp6_l2_flags_buf_used]; + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]); + iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; } - payload = iov[TCP_IOV_PAYLOAD].iov_base; - th = &payload->th; - data = payload->opts; - ret = tcp_fill_flag_header(c, conn, flags, th, data, &optlen); + payload = iov[TCP_IOV_PAYLOAD].iov_base; + seq = conn->seq_to_tap; + ret = tcp_prepare_flags(c, conn, flags, &payload->th, + (struct tcp_syn_opts *)&payload->data, &optlen); if (ret <= 0) return ret; - if (CONN_V4(conn)) { - struct iphdr *iph = iov[TCP_IOV_IP].iov_base; - - ip_len = tcp_fill_headers4(c, conn, iph, th, optlen, NULL, - conn->seq_to_tap); - } else { - struct ipv6hdr *ip6h = iov[TCP_IOV_IP].iov_base; - - ip_len = tcp_fill_headers6(c, conn, ip6h, th, optlen, - conn->seq_to_tap); - } - iov[TCP_IOV_PAYLOAD].iov_len = ip_len; + tcp_payload_used++; + l4len = optlen + sizeof(struct tcphdr); + iov[TCP_IOV_PAYLOAD].iov_len = l4len; + tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false); if (flags & DUP_ACK) { - int i; - for (i = 0; i < TCP_IOV_NUM; i++) { - memcpy(dup_iov[i].iov_base, iov[i].iov_base, - iov[i].iov_len); - dup_iov[i].iov_len = iov[i].iov_len; - } + struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++]; + + memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base, + iov[TCP_IOV_TAP].iov_len); + dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base; + dup_iov[TCP_IOV_IP] = iov[TCP_IOV_IP]; + memcpy(dup_iov[TCP_IOV_PAYLOAD].iov_base, + iov[TCP_IOV_PAYLOAD].iov_base, l4len); + dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len; } - if (CONN_V4(conn)) { - if (flags & DUP_ACK) - tcp4_l2_flags_buf_used++; - - if (tcp4_l2_flags_buf_used > TCP_FRAMES_MEM - 2) - tcp_buf_l2_flags_flush(c); - } else { - if (flags & DUP_ACK) - tcp6_l2_flags_buf_used++; - - if (tcp6_l2_flags_buf_used > TCP_FRAMES_MEM - 2) - tcp_buf_l2_flags_flush(c); - } + if (tcp_payload_used > TCP_FRAMES_MEM - 2) + tcp_payload_flush(c); return 0; } @@ -318,49 +237,42 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) * tcp_data_to_tap() - Finalise (queue) highest-numbered scatter-gather buffer * @c: Execution context * @conn: Connection pointer - * @plen: Payload length at L4 + * @dlen: TCP payload length * @no_csum: Don't compute IPv4 checksum, use the one from previous buffer * @seq: Sequence number to be sent */ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, - ssize_t plen, int no_csum, uint32_t seq) + ssize_t dlen, int no_csum, uint32_t seq) { - uint32_t *seq_update = &conn->seq_to_tap; + struct tcp_payload_t *payload; + const uint16_t *check = NULL; struct iovec *iov; + conn->seq_to_tap = seq + dlen; + tcp_frame_conns[tcp_payload_used] = conn; + iov = tcp_l2_iov[tcp_payload_used]; if (CONN_V4(conn)) { - struct iovec *iov_prev = tcp4_l2_iov[tcp4_l2_buf_used - 1]; - const uint16_t *check = NULL; - if (no_csum) { + struct iovec *iov_prev = tcp_l2_iov[tcp_payload_used - 1]; struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base; + check = &iph->check; } - - tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update; - tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen; - - iov = tcp4_l2_iov[tcp4_l2_buf_used++]; - iov[TCP_IOV_PAYLOAD].iov_len = tcp_fill_headers4(c, conn, - iov[TCP_IOV_IP].iov_base, - iov[TCP_IOV_PAYLOAD].iov_base, - plen, check, seq); - - if (tcp4_l2_buf_used > TCP_FRAMES_MEM - 1) - tcp_buf_l2_data_flush(c); + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]); + iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; } else if (CONN_V6(conn)) { - tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update; - tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen; - - iov = tcp6_l2_iov[tcp6_l2_buf_used++]; - iov[TCP_IOV_PAYLOAD].iov_len = tcp_fill_headers6(c, conn, - iov[TCP_IOV_IP].iov_base, - iov[TCP_IOV_PAYLOAD].iov_base, - plen, seq); - - if (tcp6_l2_buf_used > TCP_FRAMES_MEM - 1) - tcp_buf_l2_data_flush(c); + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]); + iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; } + payload = iov[TCP_IOV_PAYLOAD].iov_base; + payload->th.th_off = sizeof(struct tcphdr) / 4; + payload->th.th_x2 = 0; + payload->th.th_flags = 0; + payload->th.ack = 1; + iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr); + tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false); + if (++tcp_payload_used > TCP_FRAMES_MEM - 1) + tcp_payload_flush(c); } /** @@ -372,17 +284,17 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, * * #syscalls recvmsg */ -int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) +int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) { uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap; int fill_bufs, send_bufs = 0, last_len, iov_rem = 0; - int sendlen, len, plen, v4 = CONN_V4(conn); - int s = conn->sock, i, ret = 0; + int len, dlen, i, s = conn->sock; struct msghdr mh_sock = { 0 }; uint16_t mss = MSS_GET(conn); uint32_t already_sent, seq; struct iovec *iov; + /* How much have we read/sent since last received ack ? */ already_sent = conn->seq_to_tap - conn->seq_ack_from_tap; if (SEQ_LT(already_sent, 0)) { @@ -391,6 +303,10 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) conn->seq_ack_from_tap, conn->seq_to_tap); conn->seq_to_tap = conn->seq_ack_from_tap; already_sent = 0; + if (tcp_set_peek_offset(s, 0)) { + tcp_rst(c, conn); + return -1; + } } if (!wnd_scaled || already_sent >= wnd_scaled) { @@ -408,25 +324,26 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) iov_rem = (wnd_scaled - already_sent) % mss; } - mh_sock.msg_iov = iov_sock; - mh_sock.msg_iovlen = fill_bufs + 1; - - iov_sock[0].iov_base = tcp_buf_discard; - iov_sock[0].iov_len = already_sent; + /* Prepare iov according to kernel capability */ + if (!peek_offset_cap) { + mh_sock.msg_iov = iov_sock; + iov_sock[0].iov_base = tcp_buf_discard; + iov_sock[0].iov_len = already_sent; + mh_sock.msg_iovlen = fill_bufs + 1; + } else { + mh_sock.msg_iov = &iov_sock[1]; + mh_sock.msg_iovlen = fill_bufs; + } - if (( v4 && tcp4_l2_buf_used + fill_bufs > TCP_FRAMES_MEM) || - (!v4 && tcp6_l2_buf_used + fill_bufs > TCP_FRAMES_MEM)) { - tcp_buf_l2_data_flush(c); + if (tcp_payload_used + fill_bufs > TCP_FRAMES_MEM) { + tcp_payload_flush(c); /* Silence Coverity CWE-125 false positive */ - tcp4_l2_buf_used = tcp6_l2_buf_used = 0; + tcp_payload_used = 0; } for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) { - if (v4) - iov->iov_base = &tcp4_l2_payload[tcp4_l2_buf_used + i].data; - else - iov->iov_base = &tcp6_l2_payload[tcp6_l2_buf_used + i].data; + iov->iov_base = &tcp_payload[tcp_payload_used + i].data; iov->iov_len = mss; } if (iov_rem) @@ -437,12 +354,19 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) len = recvmsg(s, &mh_sock, MSG_PEEK); while (len < 0 && errno == EINTR); - if (len < 0) - goto err; + if (len < 0) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + tcp_rst(c, conn); + return -errno; + } + + return 0; + } if (!len) { if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) { - if ((ret = tcp_buf_send_flag(c, conn, FIN | ACK))) { + int ret = tcp_buf_send_flag(c, conn, FIN | ACK); + if (ret) { tcp_rst(c, conn); return ret; } @@ -453,42 +377,36 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) return 0; } - sendlen = len - already_sent; - if (sendlen <= 0) { + if (!peek_offset_cap) + len -= already_sent; + + if (len <= 0) { conn_flag(c, conn, STALLED); return 0; } conn_flag(c, conn, ~STALLED); - send_bufs = DIV_ROUND_UP(sendlen, mss); - last_len = sendlen - (send_bufs - 1) * mss; + send_bufs = DIV_ROUND_UP(len, mss); + last_len = len - (send_bufs - 1) * mss; /* Likely, some new data was acked too. */ - tcp_update_seqack_wnd(c, conn, 0, NULL); + tcp_update_seqack_wnd(c, conn, false, NULL); /* Finally, queue to tap */ - plen = mss; + dlen = mss; seq = conn->seq_to_tap; for (i = 0; i < send_bufs; i++) { - int no_csum = i && i != send_bufs - 1 && tcp4_l2_buf_used; + int no_csum = i && i != send_bufs - 1 && tcp_payload_used; if (i == send_bufs - 1) - plen = last_len; + dlen = last_len; - tcp_data_to_tap(c, conn, plen, no_csum, seq); - seq += plen; + tcp_data_to_tap(c, conn, dlen, no_csum, seq); + seq += dlen; } conn_flag(c, conn, ACK_FROM_TAP_DUE); return 0; - -err: - if (errno != EAGAIN && errno != EWOULDBLOCK) { - ret = -errno; - tcp_rst(c, conn); - } - - return ret; } |