aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--tcp.c66
-rw-r--r--tcp_buf.c18
-rw-r--r--tcp_internal.h7
-rw-r--r--tcp_vu.c17
4 files changed, 82 insertions, 26 deletions
diff --git a/tcp.c b/tcp.c
index b35f220..48b1ef2 100644
--- a/tcp.c
+++ b/tcp.c
@@ -399,7 +399,7 @@ static int tcp_sock_ns [NUM_PORTS][IP_VERSIONS];
*/
static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
-char tcp_buf_discard [MAX_WINDOW];
+char tcp_buf_discard [BUF_DISCARD_SIZE];
/* Does the kernel support TCP_PEEK_OFF? */
bool peek_offset_cap;
@@ -3844,3 +3844,67 @@ fail:
return 0;
}
+
+/**
+ * tcp_prepare_iov() - Prepare iov according to kernel capability
+ * @msg: Message header to update
+ * @iov: iovec to receive TCP payload and data to discard
+ * @already_sent: Bytes sent after the last acknowledged one
+ * @payload_iov_cnt: Number of TCP payload iovec entries
+ *
+ * Return: 0 on success, -1 if already_sent cannot be discarded fully
+ */
+int tcp_prepare_iov(struct msghdr *msg, struct iovec *iov,
+ uint32_t already_sent, int payload_iov_cnt)
+{
+ /*
+ * IOV layout
+ * |- tcp_buf_discard -|---------- TCP data slots ------------|
+ *
+ * with discarded data:
+ * |------ddddddddddddd|ttttttttttttt-------------------------|
+ * ^
+ * |
+ * msg_iov
+ *
+ * without discarded data:
+ * |-------------------|ttttttttttttt-------------------------|
+ * ^
+ * |
+ * msg_iov
+ * d: discard data
+ * t: TCP data
+ */
+ if (peek_offset_cap) {
+ msg->msg_iov = iov + DISCARD_IOV_NUM;
+ msg->msg_iovlen = payload_iov_cnt;
+ } else {
+ int discard_cnt, discard_iov_rem;
+ struct iovec *iov_start;
+ int i;
+
+ discard_cnt = DIV_ROUND_UP(already_sent, BUF_DISCARD_SIZE);
+ if (discard_cnt > DISCARD_IOV_NUM) {
+ debug("Failed to discard %u already sent bytes",
+ already_sent);
+ return -1;
+ }
+
+ discard_iov_rem = already_sent % BUF_DISCARD_SIZE;
+
+ iov_start = iov + (DISCARD_IOV_NUM - discard_cnt);
+
+ /* Multiple iov entries pointing to the same buffer */
+ for (i = 0; i < discard_cnt; i++) {
+ iov_start[i].iov_base = tcp_buf_discard;
+ iov_start[i].iov_len = BUF_DISCARD_SIZE;
+ }
+ if (discard_iov_rem)
+ iov[DISCARD_IOV_NUM - 1].iov_len = discard_iov_rem;
+
+ msg->msg_iov = iov_start;
+ msg->msg_iovlen = discard_cnt + payload_iov_cnt;
+ }
+
+ return 0;
+}
diff --git a/tcp_buf.c b/tcp_buf.c
index b02d986..49bddbe 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -60,7 +60,7 @@ static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
static unsigned int tcp_payload_used;
/* recvmsg()/sendmsg() data for tap */
-static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
+static struct iovec iov_sock [TCP_FRAMES_MEM + DISCARD_IOV_NUM];
static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
@@ -326,15 +326,9 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
iov_rem = (wnd_scaled - already_sent) % mss;
}
- /* Prepare iov according to kernel capability */
- if (!peek_offset_cap) {
- mh_sock.msg_iov = iov_sock;
- iov_sock[0].iov_base = tcp_buf_discard;
- iov_sock[0].iov_len = already_sent;
- mh_sock.msg_iovlen = fill_bufs + 1;
- } else {
- mh_sock.msg_iov = &iov_sock[1];
- mh_sock.msg_iovlen = fill_bufs;
+ if (tcp_prepare_iov(&mh_sock, iov_sock, already_sent, fill_bufs)) {
+ tcp_rst(c, conn);
+ return -1;
}
if (tcp_payload_used + fill_bufs > TCP_FRAMES_MEM) {
@@ -344,12 +338,12 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
tcp_payload_used = 0;
}
- for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
+ for (i = 0, iov = iov_sock + DISCARD_IOV_NUM; i < fill_bufs; i++, iov++) {
iov->iov_base = &tcp_payload[tcp_payload_used + i].data;
iov->iov_len = mss;
}
if (iov_rem)
- iov_sock[fill_bufs].iov_len = iov_rem;
+ iov_sock[fill_bufs + DISCARD_IOV_NUM - 1].iov_len = iov_rem;
/* Receive into buffers, don't dequeue until acknowledged by guest. */
do
diff --git a/tcp_internal.h b/tcp_internal.h
index 65144a8..5cb6cba 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -9,6 +9,9 @@
#define MAX_WS 8
#define MAX_WINDOW (1 << (16 + (MAX_WS)))
+#define BUF_DISCARD_SIZE (1 << 20)
+#define DISCARD_IOV_NUM DIV_ROUND_UP(MAX_WINDOW, BUF_DISCARD_SIZE)
+
#define MSS4 ROUND_DOWN(IP_MAX_MTU - \
sizeof(struct tcphdr) - \
sizeof(struct iphdr), \
@@ -143,7 +146,7 @@ struct tcp_syn_opts {
.ws = TCP_OPT_WS(ws_), \
})
-extern char tcp_buf_discard [MAX_WINDOW];
+extern char tcp_buf_discard [BUF_DISCARD_SIZE];
void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
unsigned long flag);
@@ -184,4 +187,6 @@ int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
size_t *optlen);
int tcp_set_peek_offset(const struct tcp_tap_conn *conn, int offset);
+int tcp_prepare_iov(struct msghdr *msg, struct iovec *iov,
+ uint32_t already_sent, int payload_iov_cnt);
#endif /* TCP_INTERNAL_H */
diff --git a/tcp_vu.c b/tcp_vu.c
index c6b5b91..ebd3a1e 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -35,7 +35,7 @@
#include "vu_common.h"
#include <time.h>
-static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + 1];
+static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM];
static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
static int head[VIRTQUEUE_MAX_SIZE + 1];
@@ -200,7 +200,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
hdrlen = tcp_vu_hdrlen(v6);
- vu_init_elem(elem, &iov_vu[1], VIRTQUEUE_MAX_SIZE);
+ vu_init_elem(elem, &iov_vu[DISCARD_IOV_NUM], VIRTQUEUE_MAX_SIZE);
elem_cnt = 0;
*head_cnt = 0;
@@ -228,16 +228,9 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
elem_cnt += cnt;
}
- if (peek_offset_cap) {
- mh_sock.msg_iov = iov_vu + 1;
- mh_sock.msg_iovlen = elem_cnt;
- } else {
- iov_vu[0].iov_base = tcp_buf_discard;
- iov_vu[0].iov_len = already_sent;
-
- mh_sock.msg_iov = iov_vu;
- mh_sock.msg_iovlen = elem_cnt + 1;
- }
+ if (tcp_prepare_iov(&mh_sock, iov_vu, already_sent, elem_cnt))
+ /* Expect caller to do a TCP reset */
+ return -1;
do
ret = recvmsg(s, &mh_sock, MSG_PEEK);