diff options
-rw-r--r-- | tcp.c | 66 | ||||
-rw-r--r-- | tcp_buf.c | 18 | ||||
-rw-r--r-- | tcp_internal.h | 7 | ||||
-rw-r--r-- | tcp_vu.c | 17 |
4 files changed, 82 insertions, 26 deletions
@@ -399,7 +399,7 @@ static int tcp_sock_ns [NUM_PORTS][IP_VERSIONS]; */ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE]; -char tcp_buf_discard [MAX_WINDOW]; +char tcp_buf_discard [BUF_DISCARD_SIZE]; /* Does the kernel support TCP_PEEK_OFF? */ bool peek_offset_cap; @@ -3844,3 +3844,67 @@ fail: return 0; } + +/** + * tcp_prepare_iov() - Prepare iov according to kernel capability + * @msg: Message header to update + * @iov: iovec to receive TCP payload and data to discard + * @already_sent: Bytes sent after the last acknowledged one + * @payload_iov_cnt: Number of TCP payload iovec entries + * + * Return: 0 on success, -1 if already_sent cannot be discarded fully + */ +int tcp_prepare_iov(struct msghdr *msg, struct iovec *iov, + uint32_t already_sent, int payload_iov_cnt) +{ + /* + * IOV layout + * |- tcp_buf_discard -|---------- TCP data slots ------------| + * + * with discarded data: + * |------ddddddddddddd|ttttttttttttt-------------------------| + * ^ + * | + * msg_iov + * + * without discarded data: + * |-------------------|ttttttttttttt-------------------------| + * ^ + * | + * msg_iov + * d: discard data + * t: TCP data + */ + if (peek_offset_cap) { + msg->msg_iov = iov + DISCARD_IOV_NUM; + msg->msg_iovlen = payload_iov_cnt; + } else { + int discard_cnt, discard_iov_rem; + struct iovec *iov_start; + int i; + + discard_cnt = DIV_ROUND_UP(already_sent, BUF_DISCARD_SIZE); + if (discard_cnt > DISCARD_IOV_NUM) { + debug("Failed to discard %u already sent bytes", + already_sent); + return -1; + } + + discard_iov_rem = already_sent % BUF_DISCARD_SIZE; + + iov_start = iov + (DISCARD_IOV_NUM - discard_cnt); + + /* Multiple iov entries pointing to the same buffer */ + for (i = 0; i < discard_cnt; i++) { + iov_start[i].iov_base = tcp_buf_discard; + iov_start[i].iov_len = BUF_DISCARD_SIZE; + } + if (discard_iov_rem) + iov[DISCARD_IOV_NUM - 1].iov_len = discard_iov_rem; + + msg->msg_iov = iov_start; + msg->msg_iovlen = discard_cnt + payload_iov_cnt; + } + + return 0; +} @@ -60,7 +60,7 @@ static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM]; static unsigned int tcp_payload_used; /* recvmsg()/sendmsg() data for tap */ -static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; +static struct iovec iov_sock [TCP_FRAMES_MEM + DISCARD_IOV_NUM]; static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; @@ -326,15 +326,9 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) iov_rem = (wnd_scaled - already_sent) % mss; } - /* Prepare iov according to kernel capability */ - if (!peek_offset_cap) { - mh_sock.msg_iov = iov_sock; - iov_sock[0].iov_base = tcp_buf_discard; - iov_sock[0].iov_len = already_sent; - mh_sock.msg_iovlen = fill_bufs + 1; - } else { - mh_sock.msg_iov = &iov_sock[1]; - mh_sock.msg_iovlen = fill_bufs; + if (tcp_prepare_iov(&mh_sock, iov_sock, already_sent, fill_bufs)) { + tcp_rst(c, conn); + return -1; } if (tcp_payload_used + fill_bufs > TCP_FRAMES_MEM) { @@ -344,12 +338,12 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) tcp_payload_used = 0; } - for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) { + for (i = 0, iov = iov_sock + DISCARD_IOV_NUM; i < fill_bufs; i++, iov++) { iov->iov_base = &tcp_payload[tcp_payload_used + i].data; iov->iov_len = mss; } if (iov_rem) - iov_sock[fill_bufs].iov_len = iov_rem; + iov_sock[fill_bufs + DISCARD_IOV_NUM - 1].iov_len = iov_rem; /* Receive into buffers, don't dequeue until acknowledged by guest. */ do diff --git a/tcp_internal.h b/tcp_internal.h index 65144a8..5cb6cba 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -9,6 +9,9 @@ #define MAX_WS 8 #define MAX_WINDOW (1 << (16 + (MAX_WS))) +#define BUF_DISCARD_SIZE (1 << 20) +#define DISCARD_IOV_NUM DIV_ROUND_UP(MAX_WINDOW, BUF_DISCARD_SIZE) + #define MSS4 ROUND_DOWN(IP_MAX_MTU - \ sizeof(struct tcphdr) - \ sizeof(struct iphdr), \ @@ -143,7 +146,7 @@ struct tcp_syn_opts { .ws = TCP_OPT_WS(ws_), \ }) -extern char tcp_buf_discard [MAX_WINDOW]; +extern char tcp_buf_discard [BUF_DISCARD_SIZE]; void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, unsigned long flag); @@ -184,4 +187,6 @@ int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn, size_t *optlen); int tcp_set_peek_offset(const struct tcp_tap_conn *conn, int offset); +int tcp_prepare_iov(struct msghdr *msg, struct iovec *iov, + uint32_t already_sent, int payload_iov_cnt); #endif /* TCP_INTERNAL_H */ @@ -35,7 +35,7 @@ #include "vu_common.h" #include <time.h> -static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + 1]; +static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM]; static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE]; static int head[VIRTQUEUE_MAX_SIZE + 1]; @@ -200,7 +200,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, hdrlen = tcp_vu_hdrlen(v6); - vu_init_elem(elem, &iov_vu[1], VIRTQUEUE_MAX_SIZE); + vu_init_elem(elem, &iov_vu[DISCARD_IOV_NUM], VIRTQUEUE_MAX_SIZE); elem_cnt = 0; *head_cnt = 0; @@ -228,16 +228,9 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, elem_cnt += cnt; } - if (peek_offset_cap) { - mh_sock.msg_iov = iov_vu + 1; - mh_sock.msg_iovlen = elem_cnt; - } else { - iov_vu[0].iov_base = tcp_buf_discard; - iov_vu[0].iov_len = already_sent; - - mh_sock.msg_iov = iov_vu; - mh_sock.msg_iovlen = elem_cnt + 1; - } + if (tcp_prepare_iov(&mh_sock, iov_vu, already_sent, elem_cnt)) + /* Expect caller to do a TCP reset */ + return -1; do ret = recvmsg(s, &mh_sock, MSG_PEEK); |