diff options
Diffstat (limited to 'tcp_vu.c')
-rw-r--r-- | tcp_vu.c | 650 |
1 files changed, 342 insertions, 308 deletions
@@ -1,14 +1,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later +/* tcp_vu.c - TCP L2 vhost-user management functions + * + * Copyright Red Hat + * Author: Laurent Vivier <lvivier@redhat.com> + */ #include <errno.h> #include <stddef.h> #include <stdint.h> #include <netinet/ip.h> +#include <netinet/tcp.h> #include <sys/socket.h> -#include <linux/tcp.h> #include <linux/virtio_net.h> #include "util.h" @@ -23,177 +28,340 @@ #include "tcp_conn.h" #include "flow_table.h" #include "tcp_vu.h" +#include "tap.h" #include "tcp_internal.h" #include "checksum.h" +#include "vu_common.h" +#include <time.h> + +static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + 1]; +static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE]; + +/** + * tcp_vu_hdrlen() - return the size of the header in level 2 frame (TCP) + * @v6: Set for IPv6 packet + * + * Return: Return the size of the header + */ +static size_t tcp_vu_hdrlen(bool v6) +{ + size_t hdrlen; -#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr)) -#define CONN_V6(conn) (!CONN_V4(conn)) + hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) + + sizeof(struct ethhdr) + sizeof(struct tcphdr); -/* vhost-user */ -static const struct virtio_net_hdr vu_header = { - .flags = VIRTIO_NET_HDR_F_DATA_VALID, - .gso_type = VIRTIO_NET_HDR_GSO_NONE, -}; + if (v6) + hdrlen += sizeof(struct ipv6hdr); + else + hdrlen += sizeof(struct iphdr); -static unsigned char buffer[65536]; -static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE]; -static unsigned int indexes [VIRTQUEUE_MAX_SIZE]; + return hdrlen; +} -uint16_t tcp_vu_conn_tap_mss(const struct tcp_tap_conn *conn) +/** + * tcp_vu_update_check() - Calculate TCP checksum + * @tapside: Address information for one side of the flow + * @iov: Pointer to the array of IO vectors + * @iov_used: Length of the array + */ +static void tcp_vu_update_check(const struct flowside *tapside, + struct iovec *iov, int iov_used) { - (void)conn; - return USHRT_MAX; + char *base = iov[0].iov_base; + + if (inany_v4(&tapside->oaddr)) { + const struct iphdr *iph = vu_ip(base); + + tcp_update_check_tcp4(iph, iov, iov_used, + (char *)vu_payloadv4(base) - base); + } else { + const struct ipv6hdr *ip6h = vu_ip(base); + + tcp_update_check_tcp6(ip6h, iov, iov_used, + (char *)vu_payloadv6(base) - base); + } } -int tcp_vu_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) +/** + * tcp_vu_send_flag() - Send segment with flags to vhost-user (no payload) + * @c: Execution context + * @conn: Connection pointer + * @flags: TCP flags: if not set, send segment only if ACK is due + * + * Return: negative error code on connection reset, 0 otherwise + */ +int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) { - VuDev *vdev = (VuDev *)&c->vdev; - VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; - size_t tlen, vnet_hdrlen, ip_len, optlen = 0; - struct virtio_net_hdr_mrg_rxbuf *vh; - VuVirtqElement *elem; + struct vu_dev *vdev = c->vdev; + struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + const struct flowside *tapside = TAPFLOW(conn); + size_t optlen, hdrlen; + struct vu_virtq_element flags_elem[2]; + struct tcp_payload_t *payload; + struct ipv6hdr *ip6h = NULL; + struct iovec flags_iov[2]; + struct iphdr *iph = NULL; struct ethhdr *eh; + uint32_t seq; + int elem_cnt; int nb_ack; int ret; - elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer); - if (!elem) - return 0; + hdrlen = tcp_vu_hdrlen(CONN_V6(conn)); - if (elem->in_num < 1) { - err("virtio-net receive queue contains no in buffers"); - vu_queue_rewind(vdev, vq, 1); - return 0; - } + vu_set_element(&flags_elem[0], NULL, &flags_iov[0]); + + elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1, + hdrlen + sizeof(struct tcp_syn_opts), NULL); + if (elem_cnt != 1) + return -1; + + vu_set_vnethdr(vdev, flags_elem[0].in_sg[0].iov_base, 1); + + eh = vu_eth(flags_elem[0].in_sg[0].iov_base); + + memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); + memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); - vh = elem->in_sg[0].iov_base; + if (CONN_V4(conn)) { + eh->h_proto = htons(ETH_P_IP); + + iph = vu_ip(flags_elem[0].in_sg[0].iov_base); + *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP); - vh->hdr = vu_header; - if (vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { - vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); - vh->num_buffers = htole16(1); + payload = vu_payloadv4(flags_elem[0].in_sg[0].iov_base); } else { - vnet_hdrlen = sizeof(struct virtio_net_hdr); + eh->h_proto = htons(ETH_P_IPV6); + + ip6h = vu_ip(flags_elem[0].in_sg[0].iov_base); + *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP); + payload = vu_payloadv6(flags_elem[0].in_sg[0].iov_base); + } + + memset(&payload->th, 0, sizeof(payload->th)); + payload->th.doff = offsetof(struct tcp_payload_t, data) / 4; + payload->th.ack = 1; + + seq = conn->seq_to_tap; + ret = tcp_prepare_flags(c, conn, flags, &payload->th, + (struct tcp_syn_opts *)payload->data, + &optlen); + if (ret <= 0) { + vu_queue_rewind(vq, 1); + return ret; } - eh = (struct ethhdr *)((char *)elem->in_sg[0].iov_base + vnet_hdrlen); - memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest)); - memcpy(eh->h_source, c->mac, sizeof(eh->h_source)); + flags_elem[0].in_sg[0].iov_len = hdrlen + optlen; if (CONN_V4(conn)) { - struct iphdr *iph = (struct iphdr *)(eh + 1); - struct tcphdr *th = (struct tcphdr *)(iph + 1); - char *data = (char *)(th + 1); + tcp_fill_headers4(conn, NULL, iph, payload, optlen, NULL, seq, + true); + } else { + tcp_fill_headers6(conn, NULL, ip6h, payload, optlen, seq, true); + } - eh->h_proto = htons(ETH_P_IP); + if (*c->pcap) { + tcp_vu_update_check(tapside, &flags_elem[0].in_sg[0], 1); + pcap_iov(&flags_elem[0].in_sg[0], 1, + sizeof(struct virtio_net_hdr_mrg_rxbuf)); + } + nb_ack = 1; - *th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; + if (flags & DUP_ACK) { + vu_set_element(&flags_elem[1], NULL, &flags_iov[1]); + + elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1, + flags_elem[0].in_sg[0].iov_len, NULL); + if (elem_cnt == 1) { + memcpy(flags_elem[1].in_sg[0].iov_base, + flags_elem[0].in_sg[0].iov_base, + flags_elem[0].in_sg[0].iov_len); + nb_ack++; + + if (*c->pcap) + pcap_iov(&flags_elem[1].in_sg[0], 1, 0); + } + } - *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP); + vu_flush(vdev, vq, flags_elem, nb_ack); - ret = tcp_fill_flag_header(c, conn, flags, th, data, &optlen); - if (ret <= 0) { - vu_queue_rewind(vdev, vq, 1); - return ret; - } + return 0; +} - ip_len = tcp_fill_headers4(c, conn, iph, - (struct tcphdr *)(iph + 1), optlen, - NULL, conn->seq_to_tap); +/** tcp_vu_sock_recv() - Receive datastream from socket into vhost-user buffers + * @c: Execution context + * @conn: Connection pointer + * @v6: Set for IPv6 connections + * @already_sent: Number of bytes already sent + * @fillsize: Number of bytes we can receive + * @iov_cnt: number of iov (output) + * + * Return: Number of iov entries used to store the data + */ +static ssize_t tcp_vu_sock_recv(const struct ctx *c, + const struct tcp_tap_conn *conn, bool v6, + uint32_t already_sent, size_t fillsize, + int *iov_cnt) +{ + struct vu_dev *vdev = c->vdev; + struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + struct msghdr mh_sock = { 0 }; + uint16_t mss = MSS_GET(conn); + int s = conn->sock; + size_t hdrlen; + int elem_cnt; + ssize_t ret; - tlen = ip_len + sizeof(struct ethhdr); + *iov_cnt = 0; - if (*c->pcap) { - uint32_t sum = proto_ipv4_header_psum(iph->tot_len, - IPPROTO_TCP, - (struct in_addr){ .s_addr = iph->saddr }, - (struct in_addr){ .s_addr = iph->daddr }); + hdrlen = tcp_vu_hdrlen(v6); - th->check = csum(th, optlen + sizeof(struct tcphdr), sum); - } - } else { - struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1); - struct tcphdr *th = (struct tcphdr *)(ip6h + 1); - char *data = (char *)(th + 1); + vu_init_elem(elem, &iov_vu[1], VIRTQUEUE_MAX_SIZE); - eh->h_proto = htons(ETH_P_IPV6); + elem_cnt = 0; - *th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; + while (fillsize > 0 && elem_cnt < VIRTQUEUE_MAX_SIZE) { + struct iovec *iov; + size_t frame_size; + int cnt; - *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP); + if (mss > fillsize) + mss = fillsize; - ret = tcp_fill_flag_header(c, conn, flags, th, data, &optlen); - if (ret <= 0) { - vu_queue_rewind(vdev, vq, 1); - return ret; - } + cnt = vu_collect(vdev, vq, &elem[elem_cnt], + VIRTQUEUE_MAX_SIZE - elem_cnt, + mss + hdrlen, &frame_size); + if (cnt == 0) + break; - ip_len = tcp_fill_headers6(c, conn, ip6h, - (struct tcphdr *)(ip6h + 1), - optlen, conn->seq_to_tap); + frame_size -= hdrlen; + iov = &elem[elem_cnt].in_sg[0]; + iov->iov_base = (char *)iov->iov_base + hdrlen; + iov->iov_len -= hdrlen; - tlen = ip_len + sizeof(struct ethhdr); + fillsize -= frame_size; + elem_cnt += cnt; - if (*c->pcap) { - uint32_t sum = proto_ipv6_header_psum(ip6h->payload_len, - IPPROTO_TCP, - &ip6h->saddr, - &ip6h->daddr); + /* All the frames must have the same size (except the last one), + * otherwise we will no able to scan the iov array + * to find iov entries with headers + * (headers are spread every frame_size in the the array + */ + if (frame_size < mss) + break; + } - th->check = csum(th, optlen + sizeof(struct tcphdr), sum); - } + if (peek_offset_cap) { + mh_sock.msg_iov = iov_vu + 1; + mh_sock.msg_iovlen = elem_cnt; + } else { + iov_vu[0].iov_base = tcp_buf_discard; + iov_vu[0].iov_len = already_sent; + + mh_sock.msg_iov = iov_vu; + mh_sock.msg_iovlen = elem_cnt + 1; } - pcap((void *)eh, tlen); + do + ret = recvmsg(s, &mh_sock, MSG_PEEK); + while (ret < 0 && errno == EINTR); - tlen += vnet_hdrlen; - vu_queue_fill(vdev, vq, elem, tlen, 0); - nb_ack = 1; + *iov_cnt = elem_cnt; - if (flags & DUP_ACK) { - elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer); - if (elem) { - if (elem->in_num < 1 || elem->in_sg[0].iov_len < tlen) { - vu_queue_rewind(vdev, vq, 1); - } else { - memcpy(elem->in_sg[0].iov_base, vh, tlen); - nb_ack++; - } - } + return ret; +} + +/** + * tcp_vu_prepare() - Prepare the frame header + * @c: Execution context + * @conn: Connection pointer + * @first: Pointer to the array of IO vectors + * @dlen: Packet data length + * @check: Checksum, if already known + */ +static void tcp_vu_prepare(const struct ctx *c, + struct tcp_tap_conn *conn, struct iovec *first, + size_t dlen, const uint16_t **check) +{ + const struct flowside *toside = TAPFLOW(conn); + struct tcp_payload_t *payload; + char *base = first->iov_base; + struct ipv6hdr *ip6h = NULL; + struct iphdr *iph = NULL; + struct ethhdr *eh; + + /* we guess the first iovec provided by the guest can embed + * all the headers needed by L2 frame + */ + + eh = vu_eth(base); + + memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); + memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); + + /* initialize header */ + + if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { + ASSERT(first[0].iov_len >= tcp_vu_hdrlen(false)); + + eh->h_proto = htons(ETH_P_IP); + + iph = vu_ip(base); + *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP); + payload = vu_payloadv4(base); + } else { + ASSERT(first[0].iov_len >= tcp_vu_hdrlen(true)); + + eh->h_proto = htons(ETH_P_IPV6); + + ip6h = vu_ip(base); + *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP); + + payload = vu_payloadv6(base); } - vu_queue_flush(vdev, vq, nb_ack); - vu_queue_notify(vdev, vq); + memset(&payload->th, 0, sizeof(payload->th)); + payload->th.doff = offsetof(struct tcp_payload_t, data) / 4; + payload->th.ack = 1; - return 0; + if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { + tcp_fill_headers4(conn, NULL, iph, payload, dlen, + *check, conn->seq_to_tap, true); + *check = &iph->check; + } else { + tcp_fill_headers6(conn, NULL, ip6h, payload, dlen, + conn->seq_to_tap, true); + } } -int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) +/** + * tcp_vu_data_from_sock() - Handle new data from socket, queue to vhost-user, + * in window + * @c: Execution context + * @conn: Connection pointer + * + * Return: Negative on connection reset, 0 otherwise + */ +int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) { uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap; - uint32_t already_sent; - VuDev *vdev = (VuDev *)&c->vdev; - VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; - int s = conn->sock, v4 = CONN_V4(conn); - int i, ret = 0, iov_count, iov_used; - struct msghdr mh_sock = { 0 }; - size_t l2_hdrlen, vnet_hdrlen, fillsize; - ssize_t len; - uint16_t *check; + struct vu_dev *vdev = c->vdev; + struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + const struct flowside *tapside = TAPFLOW(conn); uint16_t mss = MSS_GET(conn); - int num_buffers; - int segment_size; + size_t hdrlen, fillsize; + int i, iov_cnt, iov_used; + int v6 = CONN_V6(conn); + uint32_t already_sent = 0; + const uint16_t *check; struct iovec *first; - bool has_mrg_rxbuf; + int frame_size; + int num_buffers; + ssize_t len; if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) { - err("Got packet, but no available descriptors on RX virtq."); + flow_err(conn, + "Got packet, but RX virtqueue not usable yet"); return 0; } @@ -205,6 +373,10 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) conn->seq_ack_from_tap, conn->seq_to_tap); conn->seq_to_tap = conn->seq_ack_from_tap; already_sent = 0; + if (tcp_set_peek_offset(conn->sock, 0)) { + tcp_rst(c, conn); + return -1; + } } if (!wnd_scaled || already_sent >= wnd_scaled) { @@ -215,85 +387,26 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) /* Set up buffer descriptors we'll fill completely and partially. */ - fillsize = wnd_scaled; - - iov_vu[0].iov_base = tcp_buf_discard; - iov_vu[0].iov_len = already_sent; - fillsize -= already_sent; - - has_mrg_rxbuf = vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF); - if (has_mrg_rxbuf) { - vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); - } else { - vnet_hdrlen = sizeof(struct virtio_net_hdr); - } - l2_hdrlen = vnet_hdrlen + sizeof(struct ethhdr) + sizeof(struct tcphdr); - if (v4) { - l2_hdrlen += sizeof(struct iphdr); - } else { - l2_hdrlen += sizeof(struct ipv6hdr); - } - - iov_count = 0; - segment_size = 0; - while (fillsize > 0 && iov_count < VIRTQUEUE_MAX_SIZE - 1) { - VuVirtqElement *elem; - - elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer); - if (!elem) - break; - - if (elem->in_num < 1) { - err("virtio-net receive queue contains no in buffers"); - goto err; - } - - ASSERT(elem->in_num == 1); - ASSERT(elem->in_sg[0].iov_len >= l2_hdrlen); - - indexes[iov_count] = elem->index; - - if (segment_size == 0) { - iov_vu[iov_count + 1].iov_base = - (char *)elem->in_sg[0].iov_base + l2_hdrlen; - iov_vu[iov_count + 1].iov_len = - elem->in_sg[0].iov_len - l2_hdrlen; - } else { - iov_vu[iov_count + 1].iov_base = elem->in_sg[0].iov_base; - iov_vu[iov_count + 1].iov_len = elem->in_sg[0].iov_len; + fillsize = wnd_scaled - already_sent; + + /* collect the buffers from vhost-user and fill them with the + * data from the socket + */ + len = tcp_vu_sock_recv(c, conn, v6, already_sent, fillsize, &iov_cnt); + if (len < 0) { + vu_queue_rewind(vq, iov_cnt); + if (errno != EAGAIN && errno != EWOULDBLOCK) { + tcp_rst(c, conn); + return -errno; } - - if (iov_vu[iov_count + 1].iov_len > fillsize) - iov_vu[iov_count + 1].iov_len = fillsize; - - segment_size += iov_vu[iov_count + 1].iov_len; - if (!has_mrg_rxbuf) { - segment_size = 0; - } else if (segment_size >= mss) { - iov_vu[iov_count + 1].iov_len -= segment_size - mss; - segment_size = 0; - } - fillsize -= iov_vu[iov_count + 1].iov_len; - - iov_count++; - } - if (iov_count == 0) return 0; - - mh_sock.msg_iov = iov_vu; - mh_sock.msg_iovlen = iov_count + 1; - - do - len = recvmsg(s, &mh_sock, MSG_PEEK); - while (len < 0 && errno == EINTR); - - if (len < 0) - goto err; + } if (!len) { - vu_queue_rewind(vdev, vq, iov_count); + vu_queue_rewind(vq, iov_cnt); if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) { - if ((ret = tcp_vu_send_flag(c, conn, FIN | ACK))) { + int ret = tcp_vu_send_flag(c, conn, FIN | ACK); + if (ret) { tcp_rst(c, conn); return ret; } @@ -304,26 +417,36 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) return 0; } - len -= already_sent; + if (!peek_offset_cap) + len -= already_sent; + if (len <= 0) { + vu_queue_rewind(vq, iov_cnt); conn_flag(c, conn, STALLED); - vu_queue_rewind(vdev, vq, iov_count); return 0; } conn_flag(c, conn, ~STALLED); /* Likely, some new data was acked too. */ - tcp_update_seqack_wnd(c, conn, 0, NULL); + tcp_update_seqack_wnd(c, conn, false, NULL); /* initialize headers */ + hdrlen = tcp_vu_hdrlen(v6); iov_used = 0; num_buffers = 0; check = NULL; - segment_size = 0; - for (i = 0; i < iov_count && len; i++) { + frame_size = 0; - if (segment_size == 0) + /* iov_vu is an array of buffers and the buffer size can be + * smaller than the frame size we want to use but with + * num_buffer we can merge several virtio iov buffers in one packet + * we need only to set the packet headers in the first iov and + * num_buffer to the number of iov entries + */ + for (i = 0; i < iov_cnt && len; i++) { + + if (frame_size == 0) first = &iov_vu[i + 1]; if (iov_vu[i + 1].iov_len > (size_t)len) @@ -332,129 +455,40 @@ int tcp_vu_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) len -= iov_vu[i + 1].iov_len; iov_used++; - segment_size += iov_vu[i + 1].iov_len; + frame_size += iov_vu[i + 1].iov_len; num_buffers++; - if (segment_size >= mss || len == 0 || - i + 1 == iov_count || !has_mrg_rxbuf) { - - struct ethhdr *eh; - struct virtio_net_hdr_mrg_rxbuf *vh; - char *base = (char *)first->iov_base - l2_hdrlen; - size_t size = first->iov_len + l2_hdrlen; - - vh = (struct virtio_net_hdr_mrg_rxbuf *)base; - - vh->hdr = vu_header; - if (has_mrg_rxbuf) - vh->num_buffers = htole16(num_buffers); - - eh = (struct ethhdr *)((char *)base + vnet_hdrlen); - - memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest)); - memcpy(eh->h_source, c->mac, sizeof(eh->h_source)); - - /* initialize header */ - if (v4) { - struct iphdr *iph = (struct iphdr *)(eh + 1); - struct tcphdr *th = (struct tcphdr *)(iph + 1); - - eh->h_proto = htons(ETH_P_IP); - - *th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; - - *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP); - - tcp_fill_headers4(c, conn, iph, - (struct tcphdr *)(iph + 1), - segment_size, len ? check : NULL, - conn->seq_to_tap); - - if (*c->pcap) { - uint32_t sum = proto_ipv4_header_psum(iph->tot_len, - IPPROTO_TCP, - (struct in_addr){ .s_addr = iph->saddr }, - (struct in_addr){ .s_addr = iph->daddr }); - - first->iov_base = th; - first->iov_len = size - l2_hdrlen + sizeof(*th); - - th->check = csum_iov(first, num_buffers, sum); - } - - check = &iph->check; - } else { - struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1); - struct tcphdr *th = (struct tcphdr *)(ip6h + 1); - - eh->h_proto = htons(ETH_P_IPV6); - - *th = (struct tcphdr){ - .doff = sizeof(struct tcphdr) / 4, - .ack = 1 - }; - - *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP); - - tcp_fill_headers6(c, conn, ip6h, - (struct tcphdr *)(ip6h + 1), - segment_size, conn->seq_to_tap); - if (*c->pcap) { - uint32_t sum = proto_ipv6_header_psum(ip6h->payload_len, - IPPROTO_TCP, - &ip6h->saddr, - &ip6h->daddr); - - first->iov_base = th; - first->iov_len = size - l2_hdrlen + sizeof(*th); - - th->check = csum_iov(first, num_buffers, sum); - } + if (frame_size >= mss || len == 0 || + i + 1 == iov_cnt || !vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { + if (i + 1 == iov_cnt) + check = NULL; + + /* restore first iovec base: point to vnet header */ + first->iov_base = (char *)first->iov_base - hdrlen; + first->iov_len += hdrlen; + vu_set_vnethdr(vdev, first->iov_base, num_buffers); + + tcp_vu_prepare(c, conn, first, frame_size, &check); + if (*c->pcap) { + tcp_vu_update_check(tapside, first, num_buffers); + pcap_iov(first, num_buffers, + sizeof(struct virtio_net_hdr_mrg_rxbuf)); } - /* set iov for pcap logging */ - first->iov_base = eh; - first->iov_len = size - vnet_hdrlen; - - pcap_iov(first, num_buffers); - - /* set iov_len for vu_queue_fill_by_index(); */ + conn->seq_to_tap += frame_size; - first->iov_base = base; - first->iov_len = size; - - conn->seq_to_tap += segment_size; - - segment_size = 0; + frame_size = 0; num_buffers = 0; } } /* release unused buffers */ - vu_queue_rewind(vdev, vq, iov_count - iov_used); + vu_queue_rewind(vq, iov_cnt - iov_used); /* send packets */ - for (i = 0; i < iov_used; i++) { - vu_queue_fill_by_index(vdev, vq, indexes[i], - iov_vu[i + 1].iov_len, i); - } - - vu_queue_flush(vdev, vq, iov_used); - vu_queue_notify(vdev, vq); + vu_flush(vdev, vq, elem, iov_used); conn_flag(c, conn, ACK_FROM_TAP_DUE); return 0; -err: - vu_queue_rewind(vdev, vq, iov_count); - - if (errno != EAGAIN && errno != EWOULDBLOCK) { - ret = -errno; - tcp_rst(c, conn); - } - - return ret; } |