aboutgitcodebugslistschat
path: root/tcp_vu.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcp_vu.c')
-rw-r--r--tcp_vu.c173
1 files changed, 112 insertions, 61 deletions
diff --git a/tcp_vu.c b/tcp_vu.c
index c13c45f..55242ec 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -35,9 +35,24 @@
#include "vu_common.h"
#include <time.h>
-static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM];
+static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE];
static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
-static int head[VIRTQUEUE_MAX_SIZE + 1];
+
+/**
+ * struct vu_frame - Descriptor for a TCP frame mapped to virtqueue elements
+ * @idx_element: Index of first element in elem[] for this frame
+ * @num_element: Number of virtqueue elements used by this frame
+ * @idx_iovec: Index of first iovec in iov_vu[] for this frame
+ * @num_iovec: Number of iovecs covering this frame's buffers
+ * @size: Total frame size including all headers
+ */
+static struct vu_frame {
+ int idx_element;
+ int num_element;
+ int idx_iovec;
+ int num_iovec;
+ size_t size;
+} frame[VIRTQUEUE_MAX_SIZE];
/**
* tcp_vu_hdrlen() - Sum size of all headers, from TCP to virtio-net
@@ -176,8 +191,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
* @v6: Set for IPv6 connections
* @already_sent: Number of bytes already sent
* @fillsize: Maximum bytes to fill in guest-side receiving window
- * @iov_cnt: number of iov (output)
- * @head_cnt: Pointer to store the count of head iov entries (output)
+ * @elem_used: number of element (output)
+ * @frame_cnt: Pointer to store the number of frames (output)
*
* Return: number of bytes received from the socket, or a negative error code
* on failure.
@@ -185,57 +200,78 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
const struct tcp_tap_conn *conn, bool v6,
uint32_t already_sent, size_t fillsize,
- int *iov_cnt, int *head_cnt)
+ int *elem_used, int *frame_cnt)
{
+ static struct iovec iov_msg[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM];
const struct vu_dev *vdev = c->vdev;
struct msghdr mh_sock = { 0 };
uint16_t mss = MSS_GET(conn);
size_t hdrlen, iov_used;
int s = conn->sock;
+ ssize_t ret, dlen;
int elem_cnt;
- ssize_t ret;
- int i;
-
- *iov_cnt = 0;
+ int i, j;
hdrlen = tcp_vu_hdrlen(v6);
+ *elem_used = 0;
+
iov_used = 0;
elem_cnt = 0;
- *head_cnt = 0;
+ *frame_cnt = 0;
while (fillsize > 0 && elem_cnt < ARRAY_SIZE(elem) &&
- iov_used < VIRTQUEUE_MAX_SIZE) {
- size_t frame_size, dlen, in_total;
- struct iovec *iov;
+ iov_used < ARRAY_SIZE(iov_vu) &&
+ *frame_cnt < ARRAY_SIZE(frame)) {
+ size_t frame_size, in_total;
int cnt;
cnt = vu_collect(vdev, vq, &elem[elem_cnt],
ARRAY_SIZE(elem) - elem_cnt,
- &iov_vu[DISCARD_IOV_NUM + iov_used],
- VIRTQUEUE_MAX_SIZE - iov_used, &in_total,
+ &iov_vu[iov_used],
+ ARRAY_SIZE(iov_vu) - iov_used, &in_total,
MIN(mss, fillsize) + hdrlen,
&frame_size);
if (cnt == 0)
break;
- assert((size_t)cnt == in_total); /* one iovec per element */
+
+ frame[*frame_cnt].idx_element = elem_cnt;
+ frame[*frame_cnt].num_element = cnt;
+ frame[*frame_cnt].idx_iovec = iov_used;
+ frame[*frame_cnt].num_iovec = in_total;
+ frame[*frame_cnt].size = frame_size;
+ (*frame_cnt)++;
iov_used += in_total;
- dlen = frame_size - hdrlen;
+ elem_cnt += cnt;
- /* reserve space for headers in iov */
- iov = &elem[elem_cnt].in_sg[0];
- assert(iov->iov_len >= hdrlen);
- iov->iov_base = (char *)iov->iov_base + hdrlen;
- iov->iov_len -= hdrlen;
- head[(*head_cnt)++] = elem_cnt;
+ fillsize -= frame_size - hdrlen;
+ }
- fillsize -= dlen;
- elem_cnt += cnt;
+ /* build an iov array without headers */
+ for (i = 0, j = DISCARD_IOV_NUM; i < *frame_cnt &&
+ j < ARRAY_SIZE(iov_msg); i++) {
+ struct iov_tail data;
+ ssize_t cnt;
+
+ data = IOV_TAIL(&iov_vu[frame[i].idx_iovec],
+ frame[i].num_iovec, 0);
+ iov_drop_header(&data, hdrlen);
+
+ cnt = iov_tail_clone(&iov_msg[j], ARRAY_SIZE(iov_msg) - j,
+ &data);
+ assert(cnt < ARRAY_SIZE(iov_msg) - j);
+ if (cnt < 0)
+ die("Missing entries in iov_msg");
+
+ j += cnt;
}
- if (tcp_prepare_iov(&mh_sock, iov_vu, already_sent, elem_cnt))
+ if (tcp_prepare_iov(&mh_sock, iov_msg, already_sent,
+ j - DISCARD_IOV_NUM)) {
/* Expect caller to do a TCP reset */
+ vu_queue_rewind(vq, elem_cnt);
return -1;
+ }
do
ret = recvmsg(s, &mh_sock, MSG_PEEK);
@@ -249,32 +285,49 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
if (!peek_offset_cap)
ret -= already_sent;
- i = iov_skip_bytes(&iov_vu[DISCARD_IOV_NUM], iov_used,
- MAX(hdrlen + ret, VNET_HLEN + ETH_ZLEN),
- NULL);
- if ((size_t)i < iov_used)
- i++;
+ dlen = ret;
- /* adjust head count */
- while (*head_cnt > 0 && head[*head_cnt - 1] >= i)
- (*head_cnt)--;
+ /* truncate frame */
+ for (i = 0; i < *frame_cnt; i++) {
+ struct vu_frame *f = &frame[i];
- /* mark end of array */
- head[*head_cnt] = i;
- *iov_cnt = i;
+ if ((size_t)ret <= f->size - hdrlen) {
+ unsigned cnt;
- /* release unused buffers */
- vu_queue_rewind(vq, elem_cnt - i);
+ cnt = iov_skip_bytes(&iov_vu[f->idx_iovec], f->num_iovec,
+ MAX(hdrlen + ret, VNET_HLEN + ETH_ZLEN),
+ NULL);
+ if (cnt < (unsigned)f->num_iovec)
+ cnt++;
+
+ f->size = ret + hdrlen;
+ f->num_iovec = cnt;
- /* restore space for headers in iov */
- for (i = 0; i < *head_cnt; i++) {
- struct iovec *iov = &elem[head[i]].in_sg[0];
+ for (j = 0; j < f->num_element; j++) {
+ struct vu_virtq_element *e;
- iov->iov_base = (char *)iov->iov_base - hdrlen;
- iov->iov_len += hdrlen;
+ e = &elem[f->idx_element + j];
+ if (cnt <= e->in_num) {
+ e->in_num = cnt;
+ j++;
+ break;
+ }
+ cnt -= e->in_num;
+ }
+ f->num_element = j;
+ *elem_used += j;
+ i++;
+ break;
+ }
+ *elem_used += f->num_element;
+ ret -= f->size - hdrlen;
}
+ *frame_cnt = i;
- return ret;
+ /* release unused buffers */
+ vu_queue_rewind(vq, elem_cnt - *elem_used);
+
+ return dlen;
}
/**
@@ -350,7 +403,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
uint32_t already_sent, check;
ssize_t len, previous_dlen;
- int i, iov_cnt, head_cnt;
+ int i, elem_cnt, frame_cnt;
size_t hdrlen, fillsize;
int v6 = CONN_V6(conn);
@@ -388,7 +441,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
* data from the socket
*/
len = tcp_vu_sock_recv(c, vq, conn, v6, already_sent, fillsize,
- &iov_cnt, &head_cnt);
+ &elem_cnt, &frame_cnt);
if (len < 0) {
if (len != -EAGAIN && len != -EWOULDBLOCK) {
tcp_rst(c, conn);
@@ -402,6 +455,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
}
if (!len) {
+ vu_queue_rewind(vq, elem_cnt);
if (already_sent) {
conn_flag(c, conn, STALLED);
} else if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) ==
@@ -442,34 +496,31 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
check = IP4_CSUM;
if (*c->pcap)
check |= TCP_CSUM;
- for (i = 0, previous_dlen = -1; i < head_cnt; i++) {
- struct iovec *iov = &elem[head[i]].in_sg[0];
- int buf_cnt = head[i + 1] - head[i];
- size_t frame_size = iov_size(iov, buf_cnt);
- bool push = i == head_cnt - 1;
+ for (i = 0, previous_dlen = -1; i < frame_cnt; i++) {
+ struct iovec *iov = &iov_vu[frame[i].idx_iovec];
+ int iov_cnt = frame[i].num_iovec;
+ bool push = i == frame_cnt - 1;
ssize_t dlen;
- assert(frame_size >= hdrlen);
+ assert(frame[i].size >= hdrlen);
- dlen = frame_size - hdrlen;
- if (dlen > len)
- dlen = len;
- len -= dlen;
+ dlen = frame[i].size - hdrlen;
/* The IPv4 header checksum varies only with dlen */
if (previous_dlen != dlen)
check |= IP4_CSUM;
previous_dlen = dlen;
- tcp_vu_prepare(c, conn, iov, buf_cnt, dlen, &check, push);
+ tcp_vu_prepare(c, conn, iov, iov_cnt, dlen, &check, push);
- vu_pad(elem[head[i]].in_sg, buf_cnt, dlen + hdrlen);
- vu_flush(vdev, vq, &elem[head[i]], buf_cnt, dlen + hdrlen);
+ vu_pad(iov, iov_cnt, dlen + hdrlen);
if (*c->pcap) {
- pcap_iov(iov, buf_cnt, VNET_HLEN,
+ pcap_iov(iov, iov_cnt, VNET_HLEN,
dlen + hdrlen - VNET_HLEN);
}
+ vu_flush(vdev, vq, &elem[frame[i].idx_element],
+ frame[i].num_element, dlen + hdrlen);
conn->seq_to_tap += dlen;
}