aboutgitcodebugslistschat
path: root/tcp_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcp_buf.c')
-rw-r--r--tcp_buf.c462
1 files changed, 190 insertions, 272 deletions
diff --git a/tcp_buf.c b/tcp_buf.c
index 4b428f8..0946cd5 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -6,9 +6,9 @@
* PASTA - Pack A Subtle Tap Abstraction
* for network namespace/tap device mode
*
- * tcp_buf.c - TCP L2-L4 translation state machine
+ * tcp_buf.c - TCP L2 buffer management functions
*
- * Copyright (c) 2020-2022 Red Hat GmbH
+ * Copyright Red Hat
* Author: Stefano Brivio <sbrivio@redhat.com>
*/
@@ -20,10 +20,11 @@
#include <netinet/ip.h>
-#include <linux/tcp.h>
+#include <netinet/tcp.h>
#include "util.h"
#include "ip.h"
+#include "iov.h"
#include "passt.h"
#include "tap.h"
#include "siphash.h"
@@ -33,283 +34,201 @@
#include "tcp_buf.h"
#define TCP_FRAMES_MEM 128
-#define TCP_FRAMES \
+#define TCP_FRAMES \
(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
-/**
- * tcp_buf_seq_update - Sequences to update with length of frames once sent
- * @seq: Pointer to sequence number sent to tap-side, to be updated
- * @len: TCP payload length
- */
-struct tcp_buf_seq_update {
- uint32_t *seq;
- uint16_t len;
-};
-
/* Static buffers */
-/**
- * tcp_l2_flags_t - TCP header and data to send option flags
- * @th: TCP header
- * @opts TCP option flags
- */
-struct tcp_l2_flags_t {
- struct tcphdr th;
- char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
-};
-/**
- * tcp_l2_payload_t - TCP header and data to send data
- * 32 bytes aligned to be able to use AVX2 checksum
- * @th: TCP header
- * @data: TCP data
- */
-struct tcp_l2_payload_t {
- struct tcphdr th; /* 20 bytes */
- uint8_t data[MSS]; /* 65516 bytes */
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
-/* Ethernet header for IPv4 frames */
-static struct ethhdr tcp4_eth_src;
-
-/* IPv4 headers */
-static struct iphdr tcp4_l2_ip[TCP_FRAMES_MEM];
-/* TCP headers and data for IPv4 frames */
-static struct tcp_l2_payload_t tcp4_l2_payload[TCP_FRAMES_MEM];
-
-static struct tcp_buf_seq_update tcp4_l2_buf_seq_update[TCP_FRAMES_MEM];
-static unsigned int tcp4_l2_buf_used;
-
-/* IPv4 headers for TCP option flags frames */
-static struct iphdr tcp4_l2_flags_ip[TCP_FRAMES_MEM];
-/* TCP headers and option flags for IPv4 frames */
-static struct tcp_l2_flags_t tcp4_l2_flags[TCP_FRAMES_MEM];
-static unsigned int tcp4_l2_flags_buf_used;
-
-/* Ethernet header for IPv6 frames */
+/* Ethernet header for IPv4 and IPv6 frames */
+static struct ethhdr tcp4_eth_src;
static struct ethhdr tcp6_eth_src;
-/* IPv6 headers */
-static struct ipv6hdr tcp6_l2_ip[TCP_FRAMES_MEM];
-/* TCP headers and data for IPv6 frames */
-static struct tcp_l2_payload_t tcp6_l2_payload[TCP_FRAMES_MEM];
+static struct tap_hdr tcp_payload_tap_hdr[TCP_FRAMES_MEM];
+
+/* IP headers for IPv4 and IPv6 */
+struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM];
+struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM];
-static struct tcp_buf_seq_update tcp6_l2_buf_seq_update[TCP_FRAMES_MEM];
-static unsigned int tcp6_l2_buf_used;
+/* TCP segments with payload for IPv4 and IPv6 frames */
+static struct tcp_payload_t tcp_payload[TCP_FRAMES_MEM];
-/* IPv6 headers for TCP option flags frames */
-static struct ipv6hdr tcp6_l2_flags_ip[TCP_FRAMES_MEM];
-/* TCP headers and option flags for IPv6 frames */
-static struct tcp_l2_flags_t tcp6_l2_flags[TCP_FRAMES_MEM];
+static_assert(MSS4 <= sizeof(tcp_payload[0].data), "MSS4 is greater than 65516");
+static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516");
-static unsigned int tcp6_l2_flags_buf_used;
+/* References tracking the owner connection of frames in the tap outqueue */
+static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
+static unsigned int tcp_payload_used;
/* recvmsg()/sendmsg() data for tap */
static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
-static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_IOV_NUM];
-static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_IOV_NUM];
-static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_IOV_NUM];
-static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_IOV_NUM];
+static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
/**
- * tcp_buf_update_l2() - Update L2 buffers with Ethernet and IPv4 addresses
+ * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
* @eth_d: Ethernet destination address, NULL if unchanged
* @eth_s: Ethernet source address, NULL if unchanged
*/
-void tcp_buf_update_l2(const unsigned char *eth_d, const unsigned char *eth_s)
+void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
{
eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
}
/**
- * tcp_buf_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
+ * tcp_sock_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
* @c: Execution context
*/
-void tcp_buf_sock4_iov_init(const struct ctx *c)
+void tcp_sock_iov_init(const struct ctx *c)
{
+ struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
int i;
- (void)c;
-
+ tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
+
+ for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) {
+ tcp6_payload_ip[i] = ip6;
+ tcp4_payload_ip[i] = iph;
+ }
+
for (i = 0; i < TCP_FRAMES_MEM; i++) {
- struct iovec *iov;
-
- /* headers */
- tcp4_l2_ip[i] = iph;
- tcp4_l2_payload[i].th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
-
- tcp4_l2_flags_ip[i] = iph;
- tcp4_l2_flags[i].th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
-
- /* iovecs */
- iov = tcp4_l2_iov[i];
- iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
- iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
- iov[TCP_IOV_IP].iov_base = &tcp4_l2_ip[i];
- iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr);
- iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_l2_payload[i];
+ struct iovec *iov = tcp_l2_iov[i];
- iov = tcp4_l2_flags_iov[i];
- iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
+ iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_payload_tap_hdr[i]);
iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
- iov[TCP_IOV_IP].iov_base = &tcp4_l2_flags_ip[i];
- iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr);
- iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_l2_flags[i];
+ iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
}
}
/**
- * tcp_buf_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets
- * @c: Execution context
+ * tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission
+ * @ctx: Execution context
+ * @conns: Array of connection pointers corresponding to queued frames
+ * @frames: Two-dimensional array containing queued frames with sub-iovs
+ * @num_frames: Number of entries in the two arrays to be compared
*/
-void tcp_buf_sock6_iov_init(const struct ctx *c)
+static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
+ struct iovec (*frames)[TCP_NUM_IOVS], int num_frames)
{
- struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
int i;
- (void)c;
+ for (i = 0; i < num_frames; i++) {
+ const struct tcphdr *th = frames[i][TCP_IOV_PAYLOAD].iov_base;
+ struct tcp_tap_conn *conn = conns[i];
+ uint32_t seq = ntohl(th->seq);
+ uint32_t peek_offset;
- tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
- for (i = 0; i < TCP_FRAMES_MEM; i++) {
- struct iovec *iov;
-
- /* headers */
- tcp6_l2_ip[i] = ip6;
- tcp6_l2_payload[i].th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
-
- tcp6_l2_flags_ip[i] = ip6;
- tcp6_l2_flags[i].th = (struct tcphdr){
- .doff = sizeof(struct tcphdr) / 4,
- .ack = 1
- };
-
- /* iovecs */
- iov = tcp6_l2_iov[i];
- iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
- iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
- iov[TCP_IOV_IP].iov_base = &tcp6_l2_ip[i];
- iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr);
- iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_l2_payload[i];
+ if (SEQ_LE(conn->seq_to_tap, seq))
+ continue;
- iov = tcp6_l2_flags_iov[i];
- iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
- iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
- iov[TCP_IOV_IP].iov_base = &tcp6_l2_flags_ip[i];
- iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr);
- iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_l2_flags[i];
+ conn->seq_to_tap = seq;
+ peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap;
+ if (tcp_set_peek_offset(conn->sock, peek_offset))
+ tcp_rst(c, conn);
}
}
/**
- * tcp_buf_l2_flags_flush() - Send out buffers for segments with no data (flags)
+ * tcp_payload_flush() - Send out buffers for segments with data or flags
* @c: Execution context
*/
-void tcp_buf_l2_flags_flush(const struct ctx *c)
+void tcp_payload_flush(const struct ctx *c)
{
- tap_send_iov(c, tcp6_l2_flags_iov, tcp6_l2_flags_buf_used);
- tcp6_l2_flags_buf_used = 0;
+ size_t m;
- tap_send_iov(c, tcp4_l2_flags_iov, tcp4_l2_flags_buf_used);
- tcp4_l2_flags_buf_used = 0;
+ m = tap_send_frames(c, &tcp_l2_iov[0][0], TCP_NUM_IOVS,
+ tcp_payload_used);
+ if (m != tcp_payload_used) {
+ tcp_revert_seq(c, &tcp_frame_conns[m], &tcp_l2_iov[m],
+ tcp_payload_used - m);
+ }
+ tcp_payload_used = 0;
}
/**
- * tcp_buf_l2_data_flush() - Send out buffers for segments with data
- * @c: Execution context
+ * tcp_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers
+ * @conn: Connection pointer
+ * @iov: Pointer to an array of iovec of TCP pre-cooked buffers
+ * @dlen: TCP payload length
+ * @check: Checksum, if already known
+ * @seq: Sequence number for this segment
+ * @no_tcp_csum: Do not set TCP checksum
*/
-void tcp_buf_l2_data_flush(const struct ctx *c)
+static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
+ struct iovec *iov, size_t dlen,
+ const uint16_t *check, uint32_t seq,
+ bool no_tcp_csum)
{
- unsigned i;
- size_t m;
-
- m = tap_send_iov(c, tcp6_l2_iov, tcp6_l2_buf_used);
- for (i = 0; i < m; i++)
- *tcp6_l2_buf_seq_update[i].seq += tcp6_l2_buf_seq_update[i].len;
- tcp6_l2_buf_used = 0;
-
- m = tap_send_iov(c, tcp4_l2_iov, tcp4_l2_buf_used);
- for (i = 0; i < m; i++)
- *tcp4_l2_buf_seq_update[i].seq += tcp4_l2_buf_seq_update[i].len;
- tcp4_l2_buf_used = 0;
+ const struct flowside *tapside = TAPFLOW(conn);
+ const struct in_addr *a4 = inany_v4(&tapside->oaddr);
+
+ if (a4) {
+ tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
+ iov[TCP_IOV_IP].iov_base,
+ iov[TCP_IOV_PAYLOAD].iov_base, dlen,
+ check, seq, no_tcp_csum);
+ } else {
+ tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
+ iov[TCP_IOV_IP].iov_base,
+ iov[TCP_IOV_PAYLOAD].iov_base, dlen,
+ seq, no_tcp_csum);
+ }
}
-int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+/**
+ * tcp_buf_send_flag() - Send segment with flags to tap (no payload)
+ * @c: Execution context
+ * @conn: Connection pointer
+ * @flags: TCP flags: if not set, send segment only if ACK is due
+ *
+ * Return: negative error code on connection reset, 0 otherwise
+ */
+int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
- struct tcp_l2_flags_t *payload;
- struct iovec *dup_iov;
+ struct tcp_payload_t *payload;
struct iovec *iov;
- struct tcphdr *th;
- size_t optlen = 0;
- size_t ip_len;
- char *data;
+ size_t optlen;
+ size_t l4len;
+ uint32_t seq;
int ret;
+ iov = tcp_l2_iov[tcp_payload_used];
if (CONN_V4(conn)) {
- iov = tcp4_l2_flags_iov[tcp4_l2_flags_buf_used++];
- dup_iov = tcp4_l2_flags_iov[tcp4_l2_flags_buf_used];
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
+ iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
} else {
- iov = tcp6_l2_flags_iov[tcp6_l2_flags_buf_used++];
- dup_iov = tcp6_l2_flags_iov[tcp6_l2_flags_buf_used];
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
+ iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
}
- payload = iov[TCP_IOV_PAYLOAD].iov_base;
- th = &payload->th;
- data = payload->opts;
- ret = tcp_fill_flag_header(c, conn, flags, th, data, &optlen);
+ payload = iov[TCP_IOV_PAYLOAD].iov_base;
+ seq = conn->seq_to_tap;
+ ret = tcp_prepare_flags(c, conn, flags, &payload->th,
+ (struct tcp_syn_opts *)&payload->data, &optlen);
if (ret <= 0)
return ret;
- if (CONN_V4(conn)) {
- struct iphdr *iph = iov[TCP_IOV_IP].iov_base;
-
- ip_len = tcp_fill_headers4(c, conn, iph, th, optlen, NULL,
- conn->seq_to_tap);
- } else {
- struct ipv6hdr *ip6h = iov[TCP_IOV_IP].iov_base;
-
- ip_len = tcp_fill_headers6(c, conn, ip6h, th, optlen,
- conn->seq_to_tap);
- }
- iov[TCP_IOV_PAYLOAD].iov_len = ip_len;
+ tcp_payload_used++;
+ l4len = optlen + sizeof(struct tcphdr);
+ iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+ tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
if (flags & DUP_ACK) {
- int i;
- for (i = 0; i < TCP_IOV_NUM; i++) {
- memcpy(dup_iov[i].iov_base, iov[i].iov_base,
- iov[i].iov_len);
- dup_iov[i].iov_len = iov[i].iov_len;
- }
+ struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
+
+ memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
+ iov[TCP_IOV_TAP].iov_len);
+ dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
+ dup_iov[TCP_IOV_IP] = iov[TCP_IOV_IP];
+ memcpy(dup_iov[TCP_IOV_PAYLOAD].iov_base,
+ iov[TCP_IOV_PAYLOAD].iov_base, l4len);
+ dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
}
- if (CONN_V4(conn)) {
- if (flags & DUP_ACK)
- tcp4_l2_flags_buf_used++;
-
- if (tcp4_l2_flags_buf_used > TCP_FRAMES_MEM - 2)
- tcp_buf_l2_flags_flush(c);
- } else {
- if (flags & DUP_ACK)
- tcp6_l2_flags_buf_used++;
-
- if (tcp6_l2_flags_buf_used > TCP_FRAMES_MEM - 2)
- tcp_buf_l2_flags_flush(c);
- }
+ if (tcp_payload_used > TCP_FRAMES_MEM - 2)
+ tcp_payload_flush(c);
return 0;
}
@@ -318,49 +237,42 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
* tcp_data_to_tap() - Finalise (queue) highest-numbered scatter-gather buffer
* @c: Execution context
* @conn: Connection pointer
- * @plen: Payload length at L4
+ * @dlen: TCP payload length
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
* @seq: Sequence number to be sent
*/
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
- ssize_t plen, int no_csum, uint32_t seq)
+ ssize_t dlen, int no_csum, uint32_t seq)
{
- uint32_t *seq_update = &conn->seq_to_tap;
+ struct tcp_payload_t *payload;
+ const uint16_t *check = NULL;
struct iovec *iov;
+ conn->seq_to_tap = seq + dlen;
+ tcp_frame_conns[tcp_payload_used] = conn;
+ iov = tcp_l2_iov[tcp_payload_used];
if (CONN_V4(conn)) {
- struct iovec *iov_prev = tcp4_l2_iov[tcp4_l2_buf_used - 1];
- const uint16_t *check = NULL;
-
if (no_csum) {
+ struct iovec *iov_prev = tcp_l2_iov[tcp_payload_used - 1];
struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
+
check = &iph->check;
}
-
- tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update;
- tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen;
-
- iov = tcp4_l2_iov[tcp4_l2_buf_used++];
- iov[TCP_IOV_PAYLOAD].iov_len = tcp_fill_headers4(c, conn,
- iov[TCP_IOV_IP].iov_base,
- iov[TCP_IOV_PAYLOAD].iov_base,
- plen, check, seq);
-
- if (tcp4_l2_buf_used > TCP_FRAMES_MEM - 1)
- tcp_buf_l2_data_flush(c);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
+ iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
} else if (CONN_V6(conn)) {
- tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update;
- tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen;
-
- iov = tcp6_l2_iov[tcp6_l2_buf_used++];
- iov[TCP_IOV_PAYLOAD].iov_len = tcp_fill_headers6(c, conn,
- iov[TCP_IOV_IP].iov_base,
- iov[TCP_IOV_PAYLOAD].iov_base,
- plen, seq);
-
- if (tcp6_l2_buf_used > TCP_FRAMES_MEM - 1)
- tcp_buf_l2_data_flush(c);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
+ iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
}
+ payload = iov[TCP_IOV_PAYLOAD].iov_base;
+ payload->th.th_off = sizeof(struct tcphdr) / 4;
+ payload->th.th_x2 = 0;
+ payload->th.th_flags = 0;
+ payload->th.ack = 1;
+ iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
+ tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
+ if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
+ tcp_payload_flush(c);
}
/**
@@ -372,17 +284,17 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
*
* #syscalls recvmsg
*/
-int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
+int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
{
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
int fill_bufs, send_bufs = 0, last_len, iov_rem = 0;
- int sendlen, len, plen, v4 = CONN_V4(conn);
- int s = conn->sock, i, ret = 0;
+ int len, dlen, i, s = conn->sock;
struct msghdr mh_sock = { 0 };
uint16_t mss = MSS_GET(conn);
uint32_t already_sent, seq;
struct iovec *iov;
+ /* How much have we read/sent since last received ack ? */
already_sent = conn->seq_to_tap - conn->seq_ack_from_tap;
if (SEQ_LT(already_sent, 0)) {
@@ -391,6 +303,10 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
conn->seq_ack_from_tap, conn->seq_to_tap);
conn->seq_to_tap = conn->seq_ack_from_tap;
already_sent = 0;
+ if (tcp_set_peek_offset(s, 0)) {
+ tcp_rst(c, conn);
+ return -1;
+ }
}
if (!wnd_scaled || already_sent >= wnd_scaled) {
@@ -408,25 +324,26 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
iov_rem = (wnd_scaled - already_sent) % mss;
}
- mh_sock.msg_iov = iov_sock;
- mh_sock.msg_iovlen = fill_bufs + 1;
-
- iov_sock[0].iov_base = tcp_buf_discard;
- iov_sock[0].iov_len = already_sent;
+ /* Prepare iov according to kernel capability */
+ if (!peek_offset_cap) {
+ mh_sock.msg_iov = iov_sock;
+ iov_sock[0].iov_base = tcp_buf_discard;
+ iov_sock[0].iov_len = already_sent;
+ mh_sock.msg_iovlen = fill_bufs + 1;
+ } else {
+ mh_sock.msg_iov = &iov_sock[1];
+ mh_sock.msg_iovlen = fill_bufs;
+ }
- if (( v4 && tcp4_l2_buf_used + fill_bufs > TCP_FRAMES_MEM) ||
- (!v4 && tcp6_l2_buf_used + fill_bufs > TCP_FRAMES_MEM)) {
- tcp_buf_l2_data_flush(c);
+ if (tcp_payload_used + fill_bufs > TCP_FRAMES_MEM) {
+ tcp_payload_flush(c);
/* Silence Coverity CWE-125 false positive */
- tcp4_l2_buf_used = tcp6_l2_buf_used = 0;
+ tcp_payload_used = 0;
}
for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
- if (v4)
- iov->iov_base = &tcp4_l2_payload[tcp4_l2_buf_used + i].data;
- else
- iov->iov_base = &tcp6_l2_payload[tcp6_l2_buf_used + i].data;
+ iov->iov_base = &tcp_payload[tcp_payload_used + i].data;
iov->iov_len = mss;
}
if (iov_rem)
@@ -437,12 +354,19 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
len = recvmsg(s, &mh_sock, MSG_PEEK);
while (len < 0 && errno == EINTR);
- if (len < 0)
- goto err;
+ if (len < 0) {
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
+ tcp_rst(c, conn);
+ return -errno;
+ }
+
+ return 0;
+ }
if (!len) {
if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) {
- if ((ret = tcp_buf_send_flag(c, conn, FIN | ACK))) {
+ int ret = tcp_buf_send_flag(c, conn, FIN | ACK);
+ if (ret) {
tcp_rst(c, conn);
return ret;
}
@@ -453,42 +377,36 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
return 0;
}
- sendlen = len - already_sent;
- if (sendlen <= 0) {
+ if (!peek_offset_cap)
+ len -= already_sent;
+
+ if (len <= 0) {
conn_flag(c, conn, STALLED);
return 0;
}
conn_flag(c, conn, ~STALLED);
- send_bufs = DIV_ROUND_UP(sendlen, mss);
- last_len = sendlen - (send_bufs - 1) * mss;
+ send_bufs = DIV_ROUND_UP(len, mss);
+ last_len = len - (send_bufs - 1) * mss;
/* Likely, some new data was acked too. */
- tcp_update_seqack_wnd(c, conn, 0, NULL);
+ tcp_update_seqack_wnd(c, conn, false, NULL);
/* Finally, queue to tap */
- plen = mss;
+ dlen = mss;
seq = conn->seq_to_tap;
for (i = 0; i < send_bufs; i++) {
- int no_csum = i && i != send_bufs - 1 && tcp4_l2_buf_used;
+ int no_csum = i && i != send_bufs - 1 && tcp_payload_used;
if (i == send_bufs - 1)
- plen = last_len;
+ dlen = last_len;
- tcp_data_to_tap(c, conn, plen, no_csum, seq);
- seq += plen;
+ tcp_data_to_tap(c, conn, dlen, no_csum, seq);
+ seq += dlen;
}
conn_flag(c, conn, ACK_FROM_TAP_DUE);
return 0;
-
-err:
- if (errno != EAGAIN && errno != EWOULDBLOCK) {
- ret = -errno;
- tcp_rst(c, conn);
- }
-
- return ret;
}