aboutgitcodebugslistschat
path: root/tcp_vu.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcp_vu.c')
-rw-r--r--tcp_vu.c650
1 files changed, 342 insertions, 308 deletions
diff --git a/tcp_vu.c b/tcp_vu.c
index b0a3993..bb7d42a 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -1,14 +1,19 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+/* tcp_vu.c - TCP L2 vhost-user management functions
+ *
+ * Copyright Red Hat
+ * Author: Laurent Vivier <lvivier@redhat.com>
+ */
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <netinet/ip.h>
+#include <netinet/tcp.h>
#include <sys/socket.h>
-#include <linux/tcp.h>
#include <linux/virtio_net.h>
#include "util.h"
@@ -23,177 +28,340 @@
#include "tcp_conn.h"
#include "flow_table.h"
#include "tcp_vu.h"
+#include "tap.h"
#include "tcp_internal.h"
#include "checksum.h"
+#include "vu_common.h"
+#include <time.h>
+
+static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + 1];
+static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
+
+/**
+ * tcp_vu_hdrlen() - return the size of the header in level 2 frame (TCP)
+ * @v6: Set for IPv6 packet
+ *
+ * Return: Return the size of the header
+ */
+static size_t tcp_vu_hdrlen(bool v6)
+{
+ size_t hdrlen;
-#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr))
-#define CONN_V6(conn) (!CONN_V4(conn))
+ hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
+ sizeof(struct ethhdr) + sizeof(struct tcphdr);
-/* vhost-user */
-static const struct virtio_net_hdr vu_header = {
- .flags = VIRTIO_NET_HDR_F_DATA_VALID,
- .gso_type = VIRTIO_NET_HDR_GSO_NONE,
-};
+ if (v6)
+ hdrlen += sizeof(struct ipv6hdr);
+ else
+ hdrlen += sizeof(struct iphdr);
-static unsigned char buffer[65536];
-static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
-static unsigned int indexes [VIRTQUEUE_MAX_SIZE];
+ return hdrlen;
+}
-uint16_t tcp_vu_conn_tap_mss(const struct tcp_tap_conn *conn)
+/**
+ * tcp_vu_update_check() - Calculate TCP checksum
+ * @tapside: Address information for one side of the flow
+ * @iov: Pointer to the array of IO vectors
+ * @iov_used: Length of the array
+ */
+static void tcp_vu_update_check(const struct flowside *tapside,
+ struct iovec *iov, int iov_used)
{
- (void)conn;
- return USHRT_MAX;
+ char *base = iov[0].iov_base;
+
+ if (inany_v4(&tapside->oaddr)) {
+ const struct iphdr *iph = vu_ip(base);
+
+ tcp_update_check_tcp4(iph, iov, iov_used,
+ (char *)vu_payloadv4(base) - base);
+ } else {
+ const struct ipv6hdr *ip6h = vu_ip(base);
+
+ tcp_update_check_tcp6(ip6h, iov, iov_used,
+ (char *)vu_payloadv6(base) - base);
+ }
}
-int tcp_vu_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+/**
+ * tcp_vu_send_flag() - Send segment with flags to vhost-user (no payload)
+ * @c: Execution context
+ * @conn: Connection pointer
+ * @flags: TCP flags: if not set, send segment only if ACK is due
+ *
+ * Return: negative error code on connection reset, 0 otherwise
+ */
+int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
- VuDev *vdev = (VuDev *)&c->vdev;
- VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
- size_t tlen, vnet_hdrlen, ip_len, optlen = 0;
- struct virtio_net_hdr_mrg_rxbuf *vh;
- VuVirtqElement *elem;
+ struct vu_dev *vdev = c->vdev;
+ struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ const struct flowside *tapside = TAPFLOW(conn);
+ size_t optlen, hdrlen;
+ struct vu_virtq_element flags_elem[2];
+ struct tcp_payload_t *payload;
+ struct ipv6hdr *ip6h = NULL;
+ struct iovec flags_iov[2];
+ struct iphdr *iph = NULL;
struct ethhdr *eh;
+ uint32_t seq;
+ int elem_cnt;
int nb_ack;
int ret;
- elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer);
- if (!elem)
- return 0;
+ hdrlen = tcp_vu_hdrlen(CONN_V6(conn));
- if (elem->in_num < 1) {
- err("virtio-net receive queue contains no in buffers");
- vu_queue_rewind(vdev, vq, 1);
- return 0;
- }
+ vu_set_element(&flags_elem[0], NULL, &flags_iov[0]);
+
+ elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
+ hdrlen + sizeof(struct tcp_syn_opts), NULL);
+ if (elem_cnt != 1)
+ return -1;
+
+ vu_set_vnethdr(vdev, flags_elem[0].in_sg[0].iov_base, 1);
+
+ eh = vu_eth(flags_elem[0].in_sg[0].iov_base);
+
+ memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
+ memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
- vh = elem->in_sg[0].iov_base;
+ if (CONN_V4(conn)) {
+ eh->h_proto = htons(ETH_P_IP);
+
+ iph = vu_ip(flags_elem[0].in_sg[0].iov_base);
+ *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
- vh->hdr = vu_header;
- if (vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
- vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- vh->num_buffers = htole16(1);
+ payload = vu_payloadv4(flags_elem[0].in_sg[0].iov_base);
} else {
- vnet_hdrlen = sizeof(struct virtio_net_hdr);
+ eh->h_proto = htons(ETH_P_IPV6);
+
+ ip6h = vu_ip(flags_elem[0].in_sg[0].iov_base);
+ *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
+ payload = vu_payloadv6(flags_elem[0].in_sg[0].iov_base);
+ }
+
+ memset(&payload->th, 0, sizeof(payload->th));
+ payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
+ payload->th.ack = 1;
+
+ seq = conn->seq_to_tap;
+ ret = tcp_prepare_flags(c, conn, flags, &payload->th,
+ (struct tcp_syn_opts *)payload->data,
+ &optlen);
+ if (ret <= 0) {
+ vu_queue_rewind(vq, 1);
+ return ret;
}
- eh = (struct ethhdr *)((char *)elem->in_sg[0].iov_base + vnet_hdrlen);
- memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest));
- memcpy(eh->h_source, c->mac, sizeof(eh->h_source));
+ flags_elem[0].in_sg[0].iov_len = hdrlen + optlen;
if (CONN_V4(conn)) {
- struct iphdr *iph = (struct iphdr *)(eh + 1);
- struct tcphdr *th = (struct tcphdr *)(iph + 1);
- char *data = (char *)(th + 1);
+ tcp_fill_headers4(conn, NULL, iph, payload, optlen, NULL, seq,
+ true);
+ } else {
+ tcp_fill_headers6(conn, NULL, ip6h, payload, optlen, seq, true);
+ }
- eh->h_proto = htons(ETH_P_IP);
+ if (*c->pcap) {
+ tcp_vu_update_check(tapside, &flags_elem[0].in_sg[0], 1);
+ pcap_iov(&flags_elem[0].in_sg[0], 1,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf));
+ }
+ nb_ack = 1;
- *th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
+ if (flags & DUP_ACK) {
+ vu_set_element(&flags_elem[1], NULL, &flags_iov[1]);
+
+ elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
+ flags_elem[0].in_sg[0].iov_len, NULL);
+ if (elem_cnt == 1) {
+ memcpy(flags_elem[1].in_sg[0].iov_base,
+ flags_elem[0].in_sg[0].iov_base,
+ flags_elem[0].in_sg[0].iov_len);
+ nb_ack++;
+
+ if (*c->pcap)
+ pcap_iov(&flags_elem[1].in_sg[0], 1, 0);
+ }
+ }
- *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
+ vu_flush(vdev, vq, flags_elem, nb_ack);
- ret = tcp_fill_flag_header(c, conn, flags, th, data, &optlen);
- if (ret <= 0) {
- vu_queue_rewind(vdev, vq, 1);
- return ret;
- }
+ return 0;
+}
- ip_len = tcp_fill_headers4(c, conn, iph,
- (struct tcphdr *)(iph + 1), optlen,
- NULL, conn->seq_to_tap);
+/** tcp_vu_sock_recv() - Receive datastream from socket into vhost-user buffers
+ * @c: Execution context
+ * @conn: Connection pointer
+ * @v6: Set for IPv6 connections
+ * @already_sent: Number of bytes already sent
+ * @fillsize: Number of bytes we can receive
+ * @iov_cnt: number of iov (output)
+ *
+ * Return: Number of iov entries used to store the data
+ */
+static ssize_t tcp_vu_sock_recv(const struct ctx *c,
+ const struct tcp_tap_conn *conn, bool v6,
+ uint32_t already_sent, size_t fillsize,
+ int *iov_cnt)
+{
+ struct vu_dev *vdev = c->vdev;
+ struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ struct msghdr mh_sock = { 0 };
+ uint16_t mss = MSS_GET(conn);
+ int s = conn->sock;
+ size_t hdrlen;
+ int elem_cnt;
+ ssize_t ret;
- tlen = ip_len + sizeof(struct ethhdr);
+ *iov_cnt = 0;
- if (*c->pcap) {
- uint32_t sum = proto_ipv4_header_psum(iph->tot_len,
- IPPROTO_TCP,
- (struct in_addr){ .s_addr = iph->saddr },
- (struct in_addr){ .s_addr = iph->daddr });
+ hdrlen = tcp_vu_hdrlen(v6);
- th->check = csum(th, optlen + sizeof(struct tcphdr), sum);
- }
- } else {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1);
- struct tcphdr *th = (struct tcphdr *)(ip6h + 1);
- char *data = (char *)(th + 1);
+ vu_init_elem(elem, &iov_vu[1], VIRTQUEUE_MAX_SIZE);
- eh->h_proto = htons(ETH_P_IPV6);
+ elem_cnt = 0;
- *th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
+ while (fillsize > 0 && elem_cnt < VIRTQUEUE_MAX_SIZE) {
+ struct iovec *iov;
+ size_t frame_size;
+ int cnt;
- *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
+ if (mss > fillsize)
+ mss = fillsize;
- ret = tcp_fill_flag_header(c, conn, flags, th, data, &optlen);
- if (ret <= 0) {
- vu_queue_rewind(vdev, vq, 1);
- return ret;
- }
+ cnt = vu_collect(vdev, vq, &elem[elem_cnt],
+ VIRTQUEUE_MAX_SIZE - elem_cnt,
+ mss + hdrlen, &frame_size);
+ if (cnt == 0)
+ break;
- ip_len = tcp_fill_headers6(c, conn, ip6h,
- (struct tcphdr *)(ip6h + 1),
- optlen, conn->seq_to_tap);
+ frame_size -= hdrlen;
+ iov = &elem[elem_cnt].in_sg[0];
+ iov->iov_base = (char *)iov->iov_base + hdrlen;
+ iov->iov_len -= hdrlen;
- tlen = ip_len + sizeof(struct ethhdr);
+ fillsize -= frame_size;
+ elem_cnt += cnt;
- if (*c->pcap) {
- uint32_t sum = proto_ipv6_header_psum(ip6h->payload_len,
- IPPROTO_TCP,
- &ip6h->saddr,
- &ip6h->daddr);
+ /* All the frames must have the same size (except the last one),
+ * otherwise we will no able to scan the iov array
+ * to find iov entries with headers
+ * (headers are spread every frame_size in the the array
+ */
+ if (frame_size < mss)
+ break;
+ }
- th->check = csum(th, optlen + sizeof(struct tcphdr), sum);
- }
+ if (peek_offset_cap) {
+ mh_sock.msg_iov = iov_vu + 1;
+ mh_sock.msg_iovlen = elem_cnt;
+ } else {
+ iov_vu[0].iov_base = tcp_buf_discard;
+ iov_vu[0].iov_len = already_sent;
+
+ mh_sock.msg_iov = iov_vu;
+ mh_sock.msg_iovlen = elem_cnt + 1;
}
- pcap((void *)eh, tlen);
+ do
+ ret = recvmsg(s, &mh_sock, MSG_PEEK);
+ while (ret < 0 && errno == EINTR);
- tlen += vnet_hdrlen;
- vu_queue_fill(vdev, vq, elem, tlen, 0);
- nb_ack = 1;
+ *iov_cnt = elem_cnt;
- if (flags & DUP_ACK) {
- elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer);
- if (elem) {
- if (elem->in_num < 1 || elem->in_sg[0].iov_len < tlen) {
- vu_queue_rewind(vdev, vq, 1);
- } else {
- memcpy(elem->in_sg[0].iov_base, vh, tlen);
- nb_ack++;
- }
- }
+ return ret;
+}
+
+/**
+ * tcp_vu_prepare() - Prepare the frame header
+ * @c: Execution context
+ * @conn: Connection pointer
+ * @first: Pointer to the array of IO vectors
+ * @dlen: Packet data length
+ * @check: Checksum, if already known
+ */
+static void tcp_vu_prepare(const struct ctx *c,
+ struct tcp_tap_conn *conn, struct iovec *first,
+ size_t dlen, const uint16_t **check)
+{
+ const struct flowside *toside = TAPFLOW(conn);
+ struct tcp_payload_t *payload;
+ char *base = first->iov_base;
+ struct ipv6hdr *ip6h = NULL;
+ struct iphdr *iph = NULL;
+ struct ethhdr *eh;
+
+ /* we guess the first iovec provided by the guest can embed
+ * all the headers needed by L2 frame
+ */
+
+ eh = vu_eth(base);
+
+ memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
+ memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
+
+ /* initialize header */
+
+ if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
+ ASSERT(first[0].iov_len >= tcp_vu_hdrlen(false));
+
+ eh->h_proto = htons(ETH_P_IP);
+
+ iph = vu_ip(base);
+ *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
+ payload = vu_payloadv4(base);
+ } else {
+ ASSERT(first[0].iov_len >= tcp_vu_hdrlen(true));
+
+ eh->h_proto = htons(ETH_P_IPV6);
+
+ ip6h = vu_ip(base);
+ *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
+
+ payload = vu_payloadv6(base);
}
- vu_queue_flush(vdev, vq, nb_ack);
- vu_queue_notify(vdev, vq);
+ memset(&payload->th, 0, sizeof(payload->th));
+ payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
+ payload->th.ack = 1;
- return 0;
+ if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
+ tcp_fill_headers4(conn, NULL, iph, payload, dlen,
+ *check, conn->seq_to_tap, true);
+ *check = &iph->check;
+ } else {
+ tcp_fill_headers6(conn, NULL, ip6h, payload, dlen,
+ conn->seq_to_tap, true);
+ }
}
-int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
+/**
+ * tcp_vu_data_from_sock() - Handle new data from socket, queue to vhost-user,
+ * in window
+ * @c: Execution context
+ * @conn: Connection pointer
+ *
+ * Return: Negative on connection reset, 0 otherwise
+ */
+int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
{
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
- uint32_t already_sent;
- VuDev *vdev = (VuDev *)&c->vdev;
- VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
- int s = conn->sock, v4 = CONN_V4(conn);
- int i, ret = 0, iov_count, iov_used;
- struct msghdr mh_sock = { 0 };
- size_t l2_hdrlen, vnet_hdrlen, fillsize;
- ssize_t len;
- uint16_t *check;
+ struct vu_dev *vdev = c->vdev;
+ struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ const struct flowside *tapside = TAPFLOW(conn);
uint16_t mss = MSS_GET(conn);
- int num_buffers;
- int segment_size;
+ size_t hdrlen, fillsize;
+ int i, iov_cnt, iov_used;
+ int v6 = CONN_V6(conn);
+ uint32_t already_sent = 0;
+ const uint16_t *check;
struct iovec *first;
- bool has_mrg_rxbuf;
+ int frame_size;
+ int num_buffers;
+ ssize_t len;
if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
- err("Got packet, but no available descriptors on RX virtq.");
+ flow_err(conn,
+ "Got packet, but RX virtqueue not usable yet");
return 0;
}
@@ -205,6 +373,10 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
conn->seq_ack_from_tap, conn->seq_to_tap);
conn->seq_to_tap = conn->seq_ack_from_tap;
already_sent = 0;
+ if (tcp_set_peek_offset(conn->sock, 0)) {
+ tcp_rst(c, conn);
+ return -1;
+ }
}
if (!wnd_scaled || already_sent >= wnd_scaled) {
@@ -215,85 +387,26 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
/* Set up buffer descriptors we'll fill completely and partially. */
- fillsize = wnd_scaled;
-
- iov_vu[0].iov_base = tcp_buf_discard;
- iov_vu[0].iov_len = already_sent;
- fillsize -= already_sent;
-
- has_mrg_rxbuf = vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF);
- if (has_mrg_rxbuf) {
- vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- } else {
- vnet_hdrlen = sizeof(struct virtio_net_hdr);
- }
- l2_hdrlen = vnet_hdrlen + sizeof(struct ethhdr) + sizeof(struct tcphdr);
- if (v4) {
- l2_hdrlen += sizeof(struct iphdr);
- } else {
- l2_hdrlen += sizeof(struct ipv6hdr);
- }
-
- iov_count = 0;
- segment_size = 0;
- while (fillsize > 0 && iov_count < VIRTQUEUE_MAX_SIZE - 1) {
- VuVirtqElement *elem;
-
- elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer);
- if (!elem)
- break;
-
- if (elem->in_num < 1) {
- err("virtio-net receive queue contains no in buffers");
- goto err;
- }
-
- ASSERT(elem->in_num == 1);
- ASSERT(elem->in_sg[0].iov_len >= l2_hdrlen);
-
- indexes[iov_count] = elem->index;
-
- if (segment_size == 0) {
- iov_vu[iov_count + 1].iov_base =
- (char *)elem->in_sg[0].iov_base + l2_hdrlen;
- iov_vu[iov_count + 1].iov_len =
- elem->in_sg[0].iov_len - l2_hdrlen;
- } else {
- iov_vu[iov_count + 1].iov_base = elem->in_sg[0].iov_base;
- iov_vu[iov_count + 1].iov_len = elem->in_sg[0].iov_len;
+ fillsize = wnd_scaled - already_sent;
+
+ /* collect the buffers from vhost-user and fill them with the
+ * data from the socket
+ */
+ len = tcp_vu_sock_recv(c, conn, v6, already_sent, fillsize, &iov_cnt);
+ if (len < 0) {
+ vu_queue_rewind(vq, iov_cnt);
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
+ tcp_rst(c, conn);
+ return -errno;
}
-
- if (iov_vu[iov_count + 1].iov_len > fillsize)
- iov_vu[iov_count + 1].iov_len = fillsize;
-
- segment_size += iov_vu[iov_count + 1].iov_len;
- if (!has_mrg_rxbuf) {
- segment_size = 0;
- } else if (segment_size >= mss) {
- iov_vu[iov_count + 1].iov_len -= segment_size - mss;
- segment_size = 0;
- }
- fillsize -= iov_vu[iov_count + 1].iov_len;
-
- iov_count++;
- }
- if (iov_count == 0)
return 0;
-
- mh_sock.msg_iov = iov_vu;
- mh_sock.msg_iovlen = iov_count + 1;
-
- do
- len = recvmsg(s, &mh_sock, MSG_PEEK);
- while (len < 0 && errno == EINTR);
-
- if (len < 0)
- goto err;
+ }
if (!len) {
- vu_queue_rewind(vdev, vq, iov_count);
+ vu_queue_rewind(vq, iov_cnt);
if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) {
- if ((ret = tcp_vu_send_flag(c, conn, FIN | ACK))) {
+ int ret = tcp_vu_send_flag(c, conn, FIN | ACK);
+ if (ret) {
tcp_rst(c, conn);
return ret;
}
@@ -304,26 +417,36 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
return 0;
}
- len -= already_sent;
+ if (!peek_offset_cap)
+ len -= already_sent;
+
if (len <= 0) {
+ vu_queue_rewind(vq, iov_cnt);
conn_flag(c, conn, STALLED);
- vu_queue_rewind(vdev, vq, iov_count);
return 0;
}
conn_flag(c, conn, ~STALLED);
/* Likely, some new data was acked too. */
- tcp_update_seqack_wnd(c, conn, 0, NULL);
+ tcp_update_seqack_wnd(c, conn, false, NULL);
/* initialize headers */
+ hdrlen = tcp_vu_hdrlen(v6);
iov_used = 0;
num_buffers = 0;
check = NULL;
- segment_size = 0;
- for (i = 0; i < iov_count && len; i++) {
+ frame_size = 0;
- if (segment_size == 0)
+ /* iov_vu is an array of buffers and the buffer size can be
+ * smaller than the frame size we want to use but with
+ * num_buffer we can merge several virtio iov buffers in one packet
+ * we need only to set the packet headers in the first iov and
+ * num_buffer to the number of iov entries
+ */
+ for (i = 0; i < iov_cnt && len; i++) {
+
+ if (frame_size == 0)
first = &iov_vu[i + 1];
if (iov_vu[i + 1].iov_len > (size_t)len)
@@ -332,129 +455,40 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
len -= iov_vu[i + 1].iov_len;
iov_used++;
- segment_size += iov_vu[i + 1].iov_len;
+ frame_size += iov_vu[i + 1].iov_len;
num_buffers++;
- if (segment_size >= mss || len == 0 ||
- i + 1 == iov_count || !has_mrg_rxbuf) {
-
- struct ethhdr *eh;
- struct virtio_net_hdr_mrg_rxbuf *vh;
- char *base = (char *)first->iov_base - l2_hdrlen;
- size_t size = first->iov_len + l2_hdrlen;
-
- vh = (struct virtio_net_hdr_mrg_rxbuf *)base;
-
- vh->hdr = vu_header;
- if (has_mrg_rxbuf)
- vh->num_buffers = htole16(num_buffers);
-
- eh = (struct ethhdr *)((char *)base + vnet_hdrlen);
-
- memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest));
- memcpy(eh->h_source, c->mac, sizeof(eh->h_source));
-
- /* initialize header */
- if (v4) {
- struct iphdr *iph = (struct iphdr *)(eh + 1);
- struct tcphdr *th = (struct tcphdr *)(iph + 1);
-
- eh->h_proto = htons(ETH_P_IP);
-
- *th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
-
- *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
-
- tcp_fill_headers4(c, conn, iph,
- (struct tcphdr *)(iph + 1),
- segment_size, len ? check : NULL,
- conn->seq_to_tap);
-
- if (*c->pcap) {
- uint32_t sum = proto_ipv4_header_psum(iph->tot_len,
- IPPROTO_TCP,
- (struct in_addr){ .s_addr = iph->saddr },
- (struct in_addr){ .s_addr = iph->daddr });
-
- first->iov_base = th;
- first->iov_len = size - l2_hdrlen + sizeof(*th);
-
- th->check = csum_iov(first, num_buffers, sum);
- }
-
- check = &iph->check;
- } else {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1);
- struct tcphdr *th = (struct tcphdr *)(ip6h + 1);
-
- eh->h_proto = htons(ETH_P_IPV6);
-
- *th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
-
- *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
-
- tcp_fill_headers6(c, conn, ip6h,
- (struct tcphdr *)(ip6h + 1),
- segment_size, conn->seq_to_tap);
- if (*c->pcap) {
- uint32_t sum = proto_ipv6_header_psum(ip6h->payload_len,
- IPPROTO_TCP,
- &ip6h->saddr,
- &ip6h->daddr);
-
- first->iov_base = th;
- first->iov_len = size - l2_hdrlen + sizeof(*th);
-
- th->check = csum_iov(first, num_buffers, sum);
- }
+ if (frame_size >= mss || len == 0 ||
+ i + 1 == iov_cnt || !vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
+ if (i + 1 == iov_cnt)
+ check = NULL;
+
+ /* restore first iovec base: point to vnet header */
+ first->iov_base = (char *)first->iov_base - hdrlen;
+ first->iov_len += hdrlen;
+ vu_set_vnethdr(vdev, first->iov_base, num_buffers);
+
+ tcp_vu_prepare(c, conn, first, frame_size, &check);
+ if (*c->pcap) {
+ tcp_vu_update_check(tapside, first, num_buffers);
+ pcap_iov(first, num_buffers,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
- /* set iov for pcap logging */
- first->iov_base = eh;
- first->iov_len = size - vnet_hdrlen;
-
- pcap_iov(first, num_buffers);
-
- /* set iov_len for vu_queue_fill_by_index(); */
+ conn->seq_to_tap += frame_size;
- first->iov_base = base;
- first->iov_len = size;
-
- conn->seq_to_tap += segment_size;
-
- segment_size = 0;
+ frame_size = 0;
num_buffers = 0;
}
}
/* release unused buffers */
- vu_queue_rewind(vdev, vq, iov_count - iov_used);
+ vu_queue_rewind(vq, iov_cnt - iov_used);
/* send packets */
- for (i = 0; i < iov_used; i++) {
- vu_queue_fill_by_index(vdev, vq, indexes[i],
- iov_vu[i + 1].iov_len, i);
- }
-
- vu_queue_flush(vdev, vq, iov_used);
- vu_queue_notify(vdev, vq);
+ vu_flush(vdev, vq, elem, iov_used);
conn_flag(c, conn, ACK_FROM_TAP_DUE);
return 0;
-err:
- vu_queue_rewind(vdev, vq, iov_count);
-
- if (errno != EAGAIN && errno != EWOULDBLOCK) {
- ret = -errno;
- tcp_rst(c, conn);
- }
-
- return ret;
}