diff options
Diffstat (limited to 'tcp_vu.c')
| -rw-r--r-- | tcp_vu.c | 173 |
1 files changed, 112 insertions, 61 deletions
@@ -35,9 +35,24 @@ #include "vu_common.h" #include <time.h> -static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM]; +static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE]; static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE]; -static int head[VIRTQUEUE_MAX_SIZE + 1]; + +/** + * struct vu_frame - Descriptor for a TCP frame mapped to virtqueue elements + * @idx_element: Index of first element in elem[] for this frame + * @num_element: Number of virtqueue elements used by this frame + * @idx_iovec: Index of first iovec in iov_vu[] for this frame + * @num_iovec: Number of iovecs covering this frame's buffers + * @size: Total frame size including all headers + */ +static struct vu_frame { + int idx_element; + int num_element; + int idx_iovec; + int num_iovec; + size_t size; +} frame[VIRTQUEUE_MAX_SIZE]; /** * tcp_vu_hdrlen() - Sum size of all headers, from TCP to virtio-net @@ -176,8 +191,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) * @v6: Set for IPv6 connections * @already_sent: Number of bytes already sent * @fillsize: Maximum bytes to fill in guest-side receiving window - * @iov_cnt: number of iov (output) - * @head_cnt: Pointer to store the count of head iov entries (output) + * @elem_used: number of element (output) + * @frame_cnt: Pointer to store the number of frames (output) * * Return: number of bytes received from the socket, or a negative error code * on failure. @@ -185,57 +200,78 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, const struct tcp_tap_conn *conn, bool v6, uint32_t already_sent, size_t fillsize, - int *iov_cnt, int *head_cnt) + int *elem_used, int *frame_cnt) { + static struct iovec iov_msg[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM]; const struct vu_dev *vdev = c->vdev; struct msghdr mh_sock = { 0 }; uint16_t mss = MSS_GET(conn); size_t hdrlen, iov_used; int s = conn->sock; + ssize_t ret, dlen; int elem_cnt; - ssize_t ret; - int i; - - *iov_cnt = 0; + int i, j; hdrlen = tcp_vu_hdrlen(v6); + *elem_used = 0; + iov_used = 0; elem_cnt = 0; - *head_cnt = 0; + *frame_cnt = 0; while (fillsize > 0 && elem_cnt < ARRAY_SIZE(elem) && - iov_used < VIRTQUEUE_MAX_SIZE) { - size_t frame_size, dlen, in_total; - struct iovec *iov; + iov_used < ARRAY_SIZE(iov_vu) && + *frame_cnt < ARRAY_SIZE(frame)) { + size_t frame_size, in_total; int cnt; cnt = vu_collect(vdev, vq, &elem[elem_cnt], ARRAY_SIZE(elem) - elem_cnt, - &iov_vu[DISCARD_IOV_NUM + iov_used], - VIRTQUEUE_MAX_SIZE - iov_used, &in_total, + &iov_vu[iov_used], + ARRAY_SIZE(iov_vu) - iov_used, &in_total, MIN(mss, fillsize) + hdrlen, &frame_size); if (cnt == 0) break; - assert((size_t)cnt == in_total); /* one iovec per element */ + + frame[*frame_cnt].idx_element = elem_cnt; + frame[*frame_cnt].num_element = cnt; + frame[*frame_cnt].idx_iovec = iov_used; + frame[*frame_cnt].num_iovec = in_total; + frame[*frame_cnt].size = frame_size; + (*frame_cnt)++; iov_used += in_total; - dlen = frame_size - hdrlen; + elem_cnt += cnt; - /* reserve space for headers in iov */ - iov = &elem[elem_cnt].in_sg[0]; - assert(iov->iov_len >= hdrlen); - iov->iov_base = (char *)iov->iov_base + hdrlen; - iov->iov_len -= hdrlen; - head[(*head_cnt)++] = elem_cnt; + fillsize -= frame_size - hdrlen; + } - fillsize -= dlen; - elem_cnt += cnt; + /* build an iov array without headers */ + for (i = 0, j = DISCARD_IOV_NUM; i < *frame_cnt && + j < ARRAY_SIZE(iov_msg); i++) { + struct iov_tail data; + ssize_t cnt; + + data = IOV_TAIL(&iov_vu[frame[i].idx_iovec], + frame[i].num_iovec, 0); + iov_drop_header(&data, hdrlen); + + cnt = iov_tail_clone(&iov_msg[j], ARRAY_SIZE(iov_msg) - j, + &data); + assert(cnt < ARRAY_SIZE(iov_msg) - j); + if (cnt < 0) + die("Missing entries in iov_msg"); + + j += cnt; } - if (tcp_prepare_iov(&mh_sock, iov_vu, already_sent, elem_cnt)) + if (tcp_prepare_iov(&mh_sock, iov_msg, already_sent, + j - DISCARD_IOV_NUM)) { /* Expect caller to do a TCP reset */ + vu_queue_rewind(vq, elem_cnt); return -1; + } do ret = recvmsg(s, &mh_sock, MSG_PEEK); @@ -249,32 +285,49 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, if (!peek_offset_cap) ret -= already_sent; - i = iov_skip_bytes(&iov_vu[DISCARD_IOV_NUM], iov_used, - MAX(hdrlen + ret, VNET_HLEN + ETH_ZLEN), - NULL); - if ((size_t)i < iov_used) - i++; + dlen = ret; - /* adjust head count */ - while (*head_cnt > 0 && head[*head_cnt - 1] >= i) - (*head_cnt)--; + /* truncate frame */ + for (i = 0; i < *frame_cnt; i++) { + struct vu_frame *f = &frame[i]; - /* mark end of array */ - head[*head_cnt] = i; - *iov_cnt = i; + if ((size_t)ret <= f->size - hdrlen) { + unsigned cnt; - /* release unused buffers */ - vu_queue_rewind(vq, elem_cnt - i); + cnt = iov_skip_bytes(&iov_vu[f->idx_iovec], f->num_iovec, + MAX(hdrlen + ret, VNET_HLEN + ETH_ZLEN), + NULL); + if (cnt < (unsigned)f->num_iovec) + cnt++; + + f->size = ret + hdrlen; + f->num_iovec = cnt; - /* restore space for headers in iov */ - for (i = 0; i < *head_cnt; i++) { - struct iovec *iov = &elem[head[i]].in_sg[0]; + for (j = 0; j < f->num_element; j++) { + struct vu_virtq_element *e; - iov->iov_base = (char *)iov->iov_base - hdrlen; - iov->iov_len += hdrlen; + e = &elem[f->idx_element + j]; + if (cnt <= e->in_num) { + e->in_num = cnt; + j++; + break; + } + cnt -= e->in_num; + } + f->num_element = j; + *elem_used += j; + i++; + break; + } + *elem_used += f->num_element; + ret -= f->size - hdrlen; } + *frame_cnt = i; - return ret; + /* release unused buffers */ + vu_queue_rewind(vq, elem_cnt - *elem_used); + + return dlen; } /** @@ -350,7 +403,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; uint32_t already_sent, check; ssize_t len, previous_dlen; - int i, iov_cnt, head_cnt; + int i, elem_cnt, frame_cnt; size_t hdrlen, fillsize; int v6 = CONN_V6(conn); @@ -388,7 +441,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) * data from the socket */ len = tcp_vu_sock_recv(c, vq, conn, v6, already_sent, fillsize, - &iov_cnt, &head_cnt); + &elem_cnt, &frame_cnt); if (len < 0) { if (len != -EAGAIN && len != -EWOULDBLOCK) { tcp_rst(c, conn); @@ -402,6 +455,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) } if (!len) { + vu_queue_rewind(vq, elem_cnt); if (already_sent) { conn_flag(c, conn, STALLED); } else if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == @@ -442,34 +496,31 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) check = IP4_CSUM; if (*c->pcap) check |= TCP_CSUM; - for (i = 0, previous_dlen = -1; i < head_cnt; i++) { - struct iovec *iov = &elem[head[i]].in_sg[0]; - int buf_cnt = head[i + 1] - head[i]; - size_t frame_size = iov_size(iov, buf_cnt); - bool push = i == head_cnt - 1; + for (i = 0, previous_dlen = -1; i < frame_cnt; i++) { + struct iovec *iov = &iov_vu[frame[i].idx_iovec]; + int iov_cnt = frame[i].num_iovec; + bool push = i == frame_cnt - 1; ssize_t dlen; - assert(frame_size >= hdrlen); + assert(frame[i].size >= hdrlen); - dlen = frame_size - hdrlen; - if (dlen > len) - dlen = len; - len -= dlen; + dlen = frame[i].size - hdrlen; /* The IPv4 header checksum varies only with dlen */ if (previous_dlen != dlen) check |= IP4_CSUM; previous_dlen = dlen; - tcp_vu_prepare(c, conn, iov, buf_cnt, dlen, &check, push); + tcp_vu_prepare(c, conn, iov, iov_cnt, dlen, &check, push); - vu_pad(elem[head[i]].in_sg, buf_cnt, dlen + hdrlen); - vu_flush(vdev, vq, &elem[head[i]], buf_cnt, dlen + hdrlen); + vu_pad(iov, iov_cnt, dlen + hdrlen); if (*c->pcap) { - pcap_iov(iov, buf_cnt, VNET_HLEN, + pcap_iov(iov, iov_cnt, VNET_HLEN, dlen + hdrlen - VNET_HLEN); } + vu_flush(vdev, vq, &elem[frame[i].idx_element], + frame[i].num_element, dlen + hdrlen); conn->seq_to_tap += dlen; } |
