From fba2b544b6c60904ac1818fba44aa86772e40ebf Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 13 Jun 2024 14:36:49 +0200 Subject: tcp: move buffers management functions to their own file Move all the TCP parts using internal buffers to tcp_buf.c and keep generic TCP management functions in tcp.c. Add tcp_internal.h to export needed functions from tcp.c and tcp_buf.h from tcp_buf.c With this change we can use existing TCP functions with a different kind of memory storage as for instance the shared memory provided by the guest via vhost-user. Signed-off-by: Laurent Vivier Signed-off-by: Stefano Brivio --- tcp.c | 568 +++--------------------------------------------------------------- 1 file changed, 20 insertions(+), 548 deletions(-) (limited to 'tcp.c') diff --git a/tcp.c b/tcp.c index e40f422..6852423 100644 --- a/tcp.c +++ b/tcp.c @@ -302,28 +302,14 @@ #include "flow.h" #include "flow_table.h" - -#define TCP_FRAMES_MEM 128 -#define TCP_FRAMES \ - (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) +#include "tcp_internal.h" +#include "tcp_buf.h" #define TCP_HASH_TABLE_LOAD 70 /* % */ #define TCP_HASH_TABLE_SIZE (FLOW_MAX * 100 / TCP_HASH_TABLE_LOAD) -#define MAX_WS 8 -#define MAX_WINDOW (1 << (16 + (MAX_WS))) - /* MSS rounding: see SET_MSS() */ #define MSS_DEFAULT 536 -#define MSS4 ROUND_DOWN(IP_MAX_MTU - \ - sizeof(struct tcphdr) - \ - sizeof(struct iphdr), \ - sizeof(uint32_t)) -#define MSS6 ROUND_DOWN(IP_MAX_MTU - \ - sizeof(struct tcphdr) - \ - sizeof(struct ipv6hdr), \ - sizeof(uint32_t)) - #define WINDOW_DEFAULT 14600 /* RFC 6928 */ #ifdef HAS_SND_WND # define KERNEL_REPORTS_SND_WND(c) ((c)->tcp.kernel_snd_wnd) @@ -345,33 +331,10 @@ */ #define SOL_TCP IPPROTO_TCP -#define SEQ_LE(a, b) ((b) - (a) < MAX_WINDOW) -#define SEQ_LT(a, b) ((b) - (a) - 1 < MAX_WINDOW) -#define SEQ_GE(a, b) ((a) - (b) < MAX_WINDOW) -#define SEQ_GT(a, b) ((a) - (b) - 1 < MAX_WINDOW) - -#define FIN (1 << 0) -#define SYN (1 << 1) -#define RST (1 << 2) -#define ACK (1 << 4) -/* Flags for internal usage */ -#define DUP_ACK (1 << 5) #define ACK_IF_NEEDED 0 /* See tcp_send_flag() */ -#define OPT_EOL 0 -#define OPT_NOP 1 -#define OPT_MSS 2 -#define OPT_MSS_LEN 4 -#define OPT_WS 3 -#define OPT_WS_LEN 3 -#define OPT_SACKP 4 -#define OPT_SACK 5 -#define OPT_TS 8 - #define TAPSIDE(conn_) ((conn_)->f.pif[1] == PIF_TAP) -#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr)) -#define CONN_V6(conn) (!CONN_V4(conn)) #define CONN_IS_CLOSING(conn) \ (((conn)->events & ESTABLISHED) && \ ((conn)->events & (SOCK_FIN_RCVD | TAP_FIN_RCVD))) @@ -408,106 +371,7 @@ static int tcp_sock_ns [NUM_PORTS][IP_VERSIONS]; */ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE]; -/* Static buffers */ -/** - * struct tcp_payload_t - TCP header and data to send segments with payload - * @th: TCP header - * @data: TCP data - */ -struct tcp_payload_t { - struct tcphdr th; - uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)]; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))); /* For AVX2 checksum routines */ -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); -#endif - -/** - * struct tcp_flags_t - TCP header and data to send zero-length - * segments (flags) - * @th: TCP header - * @opts TCP options - */ -struct tcp_flags_t { - struct tcphdr th; - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))); -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); -#endif - -/* Ethernet header for IPv4 frames */ -static struct ethhdr tcp4_eth_src; - -static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; -/* IPv4 headers */ -static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM]; -/* TCP segments with payload for IPv4 frames */ -static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM]; - -static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516"); - -/* References tracking the owner connection of frames in the tap outqueue */ -static struct tcp_tap_conn *tcp4_frame_conns[TCP_FRAMES_MEM]; -static unsigned int tcp4_payload_used; - -static struct tap_hdr tcp4_flags_tap_hdr[TCP_FRAMES_MEM]; -/* IPv4 headers for TCP segment without payload */ -static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM]; -/* TCP segments without payload for IPv4 frames */ -static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM]; - -static unsigned int tcp4_flags_used; - -/* Ethernet header for IPv6 frames */ -static struct ethhdr tcp6_eth_src; - -static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; -/* IPv6 headers */ -static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM]; -/* TCP headers and data for IPv6 frames */ -static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; - -static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516"); - -/* References tracking the owner connection of frames in the tap outqueue */ -static struct tcp_tap_conn *tcp6_frame_conns[TCP_FRAMES_MEM]; -static unsigned int tcp6_payload_used; - -static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM]; -/* IPv6 headers for TCP segment without payload */ -static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM]; -/* TCP segment without payload for IPv6 frames */ -static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM]; - -static unsigned int tcp6_flags_used; - -/* recvmsg()/sendmsg() data for tap */ -static char tcp_buf_discard [MAX_WINDOW]; -static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; - -/* - * enum tcp_iov_parts - I/O vector parts for one TCP frame - * @TCP_IOV_TAP tap backend specific header - * @TCP_IOV_ETH Ethernet header - * @TCP_IOV_IP IP (v4/v6) header - * @TCP_IOV_PAYLOAD IP payload (TCP header + data) - * @TCP_NUM_IOVS the number of entries in the iovec array - */ -enum tcp_iov_parts { - TCP_IOV_TAP = 0, - TCP_IOV_ETH = 1, - TCP_IOV_IP = 2, - TCP_IOV_PAYLOAD = 3, - TCP_NUM_IOVS -}; - -static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; +char tcp_buf_discard [MAX_WINDOW]; /* sendmsg() to socket */ static struct iovec tcp_iov [UIO_MAXIOV]; @@ -552,14 +416,6 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags) return EPOLLRDHUP; } -static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, - unsigned long flag); -#define conn_flag(c, conn, flag) \ - do { \ - flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \ - conn_flag_do(c, conn, flag); \ - } while (0) - /** * tcp_epoll_ctl() - Add/modify/delete epoll state from connection events * @c: Execution context @@ -671,8 +527,8 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) * @conn: Connection pointer * @flag: Flag to set, or ~flag to unset */ -static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, - unsigned long flag) +void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned long flag) { if (flag & (flag - 1)) { int flag_index = fls(~flag); @@ -722,8 +578,8 @@ static void tcp_hash_remove(const struct ctx *c, * @conn: Connection pointer * @event: Connection event */ -static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn, - unsigned long event) +void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned long event) { int prev, new, num = fls(event); @@ -771,12 +627,6 @@ static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn, tcp_timer_ctl(c, conn); } -#define conn_event(c, conn, event) \ - do { \ - flow_trace(conn, "event at %s:%i", __func__, __LINE__); \ - conn_event_do(c, conn, event); \ - } while (0) - /** * tcp_rtt_dst_low() - Check if low RTT was seen for connection endpoint * @conn: Connection pointer @@ -906,104 +756,6 @@ static void tcp_update_check_tcp6(struct ipv6hdr *ip6h, struct tcphdr *th) th->check = csum(th, l4len, sum); } -/** - * tcp_update_l2_buf() - Update Ethernet header buffers with addresses - * @eth_d: Ethernet destination address, NULL if unchanged - * @eth_s: Ethernet source address, NULL if unchanged - */ -void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) -{ - eth_update_mac(&tcp4_eth_src, eth_d, eth_s); - eth_update_mac(&tcp6_eth_src, eth_d, eth_s); -} - -/** - * tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets - * @c: Execution context - */ -static void tcp_sock4_iov_init(const struct ctx *c) -{ - struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP); - struct iovec *iov; - int i; - - tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); - - for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) { - tcp4_payload_ip[i] = iph; - tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4; - tcp4_payload[i].th.ack = 1; - } - - for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) { - tcp4_flags_ip[i] = iph; - tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4; - tcp4_flags[i].th.ack = 1; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp4_l2_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i]; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp4_l2_flags_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]); - iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i]; - } -} - -/** - * tcp_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets - * @c: Execution context - */ -static void tcp_sock6_iov_init(const struct ctx *c) -{ - struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP); - struct iovec *iov; - int i; - - tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); - - for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { - tcp6_payload_ip[i] = ip6; - tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; - tcp6_payload[i].th.ack = 1; - } - - for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) { - tcp6_flags_ip[i] = ip6; - tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4; - tcp6_flags[i].th .ack = 1; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp6_l2_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i]; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp6_l2_flags_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i]; - } -} - /** * tcp_opt_get() - Get option, and value if any, from TCP header * @opts: Pointer to start of TCP options in header @@ -1227,76 +979,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn) return true; } -static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); -#define tcp_rst(c, conn) \ - do { \ - flow_dbg((conn), "TCP reset at %s:%i", __func__, __LINE__); \ - tcp_rst_do(c, conn); \ - } while (0) - -/** - * tcp_flags_flush() - Send out buffers for segments with no data (flags) - * @c: Execution context - */ -static void tcp_flags_flush(const struct ctx *c) -{ - tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS, - tcp6_flags_used); - tcp6_flags_used = 0; - - tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS, - tcp4_flags_used); - tcp4_flags_used = 0; -} - -/** - * tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission - * @conns: Array of connection pointers corresponding to queued frames - * @frames: Two-dimensional array containing queued frames with sub-iovs - * @num_frames: Number of entries in the two arrays to be compared - */ -static void tcp_revert_seq(struct tcp_tap_conn **conns, struct iovec (*frames)[TCP_NUM_IOVS], - int num_frames) -{ - int i; - - for (i = 0; i < num_frames; i++) { - const struct tcphdr *th = frames[i][TCP_IOV_PAYLOAD].iov_base; - struct tcp_tap_conn *conn = conns[i]; - uint32_t seq = ntohl(th->seq); - - if (SEQ_LE(conn->seq_to_tap, seq)) - continue; - - conn->seq_to_tap = seq; - } -} - -/** - * tcp_payload_flush() - Send out buffers for segments with data - * @c: Execution context - */ -static void tcp_payload_flush(const struct ctx *c) -{ - size_t m; - - m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS, - tcp6_payload_used); - if (m != tcp6_payload_used) { - tcp_revert_seq(&tcp6_frame_conns[m], &tcp6_l2_iov[m], - tcp6_payload_used - m); - } - tcp6_payload_used = 0; - - m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS, - tcp4_payload_used); - if (m != tcp4_payload_used) { - tcp_revert_seq(&tcp4_frame_conns[m], &tcp4_l2_iov[m], - tcp4_payload_used - m); - } - tcp4_payload_used = 0; -} - /** * tcp_defer_handler() - Handler for TCP deferred tasks * @c: Execution context @@ -1430,10 +1112,10 @@ static size_t tcp_fill_headers6(const struct ctx *c, * * Return: IP payload length, host order */ -static size_t tcp_l2_buf_fill_headers(const struct ctx *c, - const struct tcp_tap_conn *conn, - struct iovec *iov, size_t dlen, - const uint16_t *check, uint32_t seq) +size_t tcp_l2_buf_fill_headers(const struct ctx *c, + const struct tcp_tap_conn *conn, + struct iovec *iov, size_t dlen, + const uint16_t *check, uint32_t seq) { const struct in_addr *a4 = inany_v4(&conn->faddr); @@ -1459,8 +1141,8 @@ static size_t tcp_l2_buf_fill_headers(const struct ctx *c, * * Return: 1 if sequence or window were updated, 0 otherwise */ -static int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, - int force_seq, struct tcp_info *tinfo) +int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, + int force_seq, struct tcp_info *tinfo) { uint32_t prev_wnd_to_tap = conn->wnd_to_tap << conn->ws_to_tap; uint32_t prev_ack_to_tap = conn->seq_ack_to_tap; @@ -1579,9 +1261,9 @@ static void tcp_update_seqack_from_tap(const struct ctx *c, * 0 if there is no flag to send * 1 otherwise */ -static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, - int flags, struct tcphdr *th, char *data, - size_t *optlen) +int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, + int flags, struct tcphdr *th, char *data, + size_t *optlen) { struct tcp_info tinfo = { 0 }; socklen_t sl = sizeof(tinfo); @@ -1678,60 +1360,9 @@ static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, * * Return: negative error code on connection reset, 0 otherwise */ -static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) +int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) { - struct tcp_flags_t *payload; - struct iovec *iov; - size_t optlen; - size_t l4len; - uint32_t seq; - int ret; - - if (CONN_V4(conn)) - iov = tcp4_l2_flags_iov[tcp4_flags_used++]; - else - iov = tcp6_l2_flags_iov[tcp6_flags_used++]; - - payload = iov[TCP_IOV_PAYLOAD].iov_base; - - seq = conn->seq_to_tap; - ret = tcp_prepare_flags(c, conn, flags, &payload->th, - payload->opts, &optlen); - if (ret <= 0) { - if (CONN_V4(conn)) - tcp4_flags_used--; - else - tcp6_flags_used--; - return ret; - } - - l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL, seq); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - - if (flags & DUP_ACK) { - struct iovec *dup_iov; - int i; - - if (CONN_V4(conn)) - dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++]; - else - dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++]; - - for (i = 0; i < TCP_NUM_IOVS; i++) - memcpy(dup_iov[i].iov_base, iov[i].iov_base, - iov[i].iov_len); - dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len; - } - - if (CONN_V4(conn)) { - if (tcp4_flags_used > TCP_FRAMES_MEM - 2) - tcp_flags_flush(c); - } else { - if (tcp6_flags_used > TCP_FRAMES_MEM - 2) - tcp_flags_flush(c); - } - - return 0; + return tcp_buf_send_flag(c, conn, flags); } /** @@ -1739,7 +1370,7 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) * @c: Execution context * @conn: Connection pointer */ -static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn) +void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn) { if (conn->events == CLOSED) return; @@ -2166,49 +1797,6 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq) return 0; } -/** - * tcp_data_to_tap() - Finalise (queue) highest-numbered scatter-gather buffer - * @c: Execution context - * @conn: Connection pointer - * @dlen: TCP payload length - * @no_csum: Don't compute IPv4 checksum, use the one from previous buffer - * @seq: Sequence number to be sent - */ -static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, - ssize_t dlen, int no_csum, uint32_t seq) -{ - struct iovec *iov; - size_t l4len; - - conn->seq_to_tap = seq + dlen; - - if (CONN_V4(conn)) { - struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1]; - const uint16_t *check = NULL; - - if (no_csum) { - struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base; - check = &iph->check; - } - - tcp4_frame_conns[tcp4_payload_used] = conn; - - iov = tcp4_l2_iov[tcp4_payload_used++]; - l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, check, seq); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (tcp4_payload_used > TCP_FRAMES_MEM - 1) - tcp_payload_flush(c); - } else if (CONN_V6(conn)) { - tcp6_frame_conns[tcp6_payload_used] = conn; - - iov = tcp6_l2_iov[tcp6_payload_used++]; - l4len = tcp_l2_buf_fill_headers(c, conn, iov, dlen, NULL, seq); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (tcp6_payload_used > TCP_FRAMES_MEM - 1) - tcp_payload_flush(c); - } -} - /** * tcp_data_from_sock() - Handle new data from socket, queue to tap, in window * @c: Execution context @@ -2220,123 +1808,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, */ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) { - uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap; - int fill_bufs, send_bufs = 0, last_len, iov_rem = 0; - int sendlen, len, dlen, v4 = CONN_V4(conn); - int s = conn->sock, i, ret = 0; - struct msghdr mh_sock = { 0 }; - uint16_t mss = MSS_GET(conn); - uint32_t already_sent, seq; - struct iovec *iov; - - already_sent = conn->seq_to_tap - conn->seq_ack_from_tap; - - if (SEQ_LT(already_sent, 0)) { - /* RFC 761, section 2.1. */ - flow_trace(conn, "ACK sequence gap: ACK for %u, sent: %u", - conn->seq_ack_from_tap, conn->seq_to_tap); - conn->seq_to_tap = conn->seq_ack_from_tap; - already_sent = 0; - } - - if (!wnd_scaled || already_sent >= wnd_scaled) { - conn_flag(c, conn, STALLED); - conn_flag(c, conn, ACK_FROM_TAP_DUE); - return 0; - } - - /* Set up buffer descriptors we'll fill completely and partially. */ - fill_bufs = DIV_ROUND_UP(wnd_scaled - already_sent, mss); - if (fill_bufs > TCP_FRAMES) { - fill_bufs = TCP_FRAMES; - iov_rem = 0; - } else { - iov_rem = (wnd_scaled - already_sent) % mss; - } - - mh_sock.msg_iov = iov_sock; - mh_sock.msg_iovlen = fill_bufs + 1; - - iov_sock[0].iov_base = tcp_buf_discard; - iov_sock[0].iov_len = already_sent; - - if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) || - (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) { - tcp_payload_flush(c); - - /* Silence Coverity CWE-125 false positive */ - tcp4_payload_used = tcp6_payload_used = 0; - } - - for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) { - if (v4) - iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data; - else - iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data; - iov->iov_len = mss; - } - if (iov_rem) - iov_sock[fill_bufs].iov_len = iov_rem; - - /* Receive into buffers, don't dequeue until acknowledged by guest. */ - do - len = recvmsg(s, &mh_sock, MSG_PEEK); - while (len < 0 && errno == EINTR); - - if (len < 0) - goto err; - - if (!len) { - if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) { - if ((ret = tcp_send_flag(c, conn, FIN | ACK))) { - tcp_rst(c, conn); - return ret; - } - - conn_event(c, conn, TAP_FIN_SENT); - } - - return 0; - } - - sendlen = len - already_sent; - if (sendlen <= 0) { - conn_flag(c, conn, STALLED); - return 0; - } - - conn_flag(c, conn, ~STALLED); - - send_bufs = DIV_ROUND_UP(sendlen, mss); - last_len = sendlen - (send_bufs - 1) * mss; - - /* Likely, some new data was acked too. */ - tcp_update_seqack_wnd(c, conn, 0, NULL); - - /* Finally, queue to tap */ - dlen = mss; - seq = conn->seq_to_tap; - for (i = 0; i < send_bufs; i++) { - int no_csum = i && i != send_bufs - 1 && tcp4_payload_used; - - if (i == send_bufs - 1) - dlen = last_len; - - tcp_data_to_tap(c, conn, dlen, no_csum, seq); - seq += dlen; - } - - conn_flag(c, conn, ACK_FROM_TAP_DUE); - - return 0; - -err: - if (errno != EAGAIN && errno != EWOULDBLOCK) { - ret = -errno; - tcp_rst(c, conn); - } - - return ret; + return tcp_buf_data_from_sock(c, conn); } /** -- cgit v1.2.3