aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--tcp.c93
-rw-r--r--tcp.h13
-rw-r--r--tcp_buf.c10
-rw-r--r--tcp_buf.h6
-rw-r--r--tcp_internal.h4
5 files changed, 82 insertions, 44 deletions
diff --git a/tcp.c b/tcp.c
index 14b48a8..cba3f3b 100644
--- a/tcp.c
+++ b/tcp.c
@@ -308,11 +308,6 @@
/* MSS rounding: see SET_MSS() */
#define MSS_DEFAULT 536
#define WINDOW_DEFAULT 14600 /* RFC 6928 */
-#ifdef HAS_SND_WND
-# define KERNEL_REPORTS_SND_WND(c) ((c)->tcp.kernel_snd_wnd)
-#else
-# define KERNEL_REPORTS_SND_WND(c) (0 && (c))
-#endif
#define ACK_INTERVAL 10 /* ms */
#define SYN_TIMEOUT 10 /* s */
@@ -370,6 +365,14 @@ char tcp_buf_discard [MAX_WINDOW];
/* Does the kernel support TCP_PEEK_OFF? */
bool peek_offset_cap;
+#ifdef HAS_SND_WND
+/* Does the kernel report sending window in TCP_INFO (kernel commit
+ * 8f7baad7f035)
+ */
+bool snd_wnd_cap;
+#else
+#define snd_wnd_cap (false)
+#endif
/* sendmsg() to socket */
static struct iovec tcp_iov [UIO_MAXIOV];
@@ -1052,7 +1055,7 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
}
#endif /* !HAS_BYTES_ACKED */
- if (!KERNEL_REPORTS_SND_WND(c)) {
+ if (!snd_wnd_cap) {
tcp_get_sndbuf(conn);
new_wnd_to_tap = MIN(SNDBUF_GET(conn), MAX_WINDOW);
conn->wnd_to_tap = MIN(new_wnd_to_tap >> conn->ws_to_tap,
@@ -1136,7 +1139,7 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
* 0 if there is no flag to send
* 1 otherwise
*/
-int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
+int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
int flags, struct tcphdr *th, char *data,
size_t *optlen)
{
@@ -1153,11 +1156,6 @@ int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
return -ECONNRESET;
}
-#ifdef HAS_SND_WND
- if (!c->tcp.kernel_snd_wnd && tinfo.tcpi_snd_wnd)
- c->tcp.kernel_snd_wnd = 1;
-#endif
-
if (!(conn->flags & LOCAL))
tcp_rtt_dst_check(conn, &tinfo);
@@ -1235,7 +1233,8 @@ int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
*
* Return: negative error code on connection reset, 0 otherwise
*/
-static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn,
+ int flags)
{
return tcp_buf_send_flag(c, conn, flags);
}
@@ -1245,7 +1244,7 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
* @c: Execution context
* @conn: Connection pointer
*/
-void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn)
+void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn)
{
if (conn->events == CLOSED)
return;
@@ -1463,7 +1462,7 @@ static void tcp_bind_outbound(const struct ctx *c,
* @optlen: Bytes in options: caller MUST ensure available length
* @now: Current timestamp
*/
-static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
+static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
const void *saddr, const void *daddr,
const struct tcphdr *th, const char *opts,
size_t optlen, const struct timespec *now)
@@ -1628,7 +1627,7 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
*
* #syscalls recvmsg
*/
-static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
+static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
{
return tcp_buf_data_from_sock(c, conn);
}
@@ -1644,8 +1643,8 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
*
* Return: count of consumed packets
*/
-static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
- const struct pool *p, int idx)
+static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
+ const struct pool *p, int idx)
{
int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
uint16_t max_ack_seq_wnd = conn->wnd_from_tap;
@@ -1842,7 +1841,8 @@ out:
* @opts: Pointer to start of options
* @optlen: Bytes in options: caller MUST ensure available length
*/
-static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
+static void tcp_conn_from_sock_finish(const struct ctx *c,
+ struct tcp_tap_conn *conn,
const struct tcphdr *th,
const char *opts, size_t optlen)
{
@@ -1885,7 +1885,7 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
*
* Return: count of consumed packets
*/
-int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
+int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
const struct pool *p, int idx, const struct timespec *now)
{
@@ -2023,7 +2023,7 @@ reset:
* @c: Execution context
* @conn: Connection pointer
*/
-static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
+static void tcp_connect_finish(const struct ctx *c, struct tcp_tap_conn *conn)
{
socklen_t sl;
int so;
@@ -2049,8 +2049,8 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
* @sa: Peer socket address (from accept())
* @now: Current timestamp
*/
-static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s,
- const struct timespec *now)
+static void tcp_tap_conn_from_sock(const struct ctx *c, union flow *flow,
+ int s, const struct timespec *now)
{
struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
uint64_t hash;
@@ -2081,7 +2081,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s,
* @ref: epoll reference of listening socket
* @now: Current timestamp
*/
-void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
+void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
const struct timespec *now)
{
const struct flowside *ini;
@@ -2146,7 +2146,7 @@ cancel:
*
* #syscalls timerfd_gettime arm:timerfd_gettime64 i686:timerfd_gettime64
*/
-void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
+void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
{
struct itimerspec check_armed = { { 0 }, { 0 } };
struct tcp_tap_conn *conn = &FLOW(ref.flow)->tcp;
@@ -2210,7 +2210,8 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
* @ref: epoll reference
* @events: epoll events bitmap
*/
-void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
+void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events)
{
struct tcp_tap_conn *conn = conn_at_sidx(ref.flowside);
@@ -2494,6 +2495,40 @@ static bool tcp_probe_peek_offset_cap(sa_family_t af)
return ret;
}
+#ifdef HAS_SND_WND
+/**
+ * tcp_probe_snd_wnd_cap() - Check if TCP_INFO reports tcpi_snd_wnd
+ *
+ * Return: true if supported, false otherwise
+ */
+static bool tcp_probe_snd_wnd_cap(void)
+{
+ struct tcp_info tinfo;
+ socklen_t sl = sizeof(tinfo);
+ int s;
+
+ s = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
+ if (s < 0) {
+ warn_perror("Temporary TCP socket creation failed");
+ return false;
+ }
+
+ if (getsockopt(s, SOL_TCP, TCP_INFO, &tinfo, &sl)) {
+ warn_perror("Failed to get TCP_INFO on temporary socket");
+ close(s);
+ return false;
+ }
+
+ close(s);
+
+ if (sl < (offsetof(struct tcp_info, tcpi_snd_wnd) +
+ sizeof(tinfo.tcpi_snd_wnd)))
+ return false;
+
+ return true;
+}
+#endif /* HAS_SND_WND */
+
/**
* tcp_init() - Get initial sequence, hash secret, initialise per-socket data
* @c: Execution context
@@ -2527,6 +2562,12 @@ int tcp_init(struct ctx *c)
(!c->ifi6 || tcp_probe_peek_offset_cap(AF_INET6));
debug("SO_PEEK_OFF%ssupported", peek_offset_cap ? " " : " not ");
+#ifdef HAS_SND_WND
+ snd_wnd_cap = tcp_probe_snd_wnd_cap();
+#endif
+ debug("TCP_INFO tcpi_snd_wnd field%ssupported",
+ snd_wnd_cap ? " " : " not ");
+
return 0;
}
diff --git a/tcp.h b/tcp.h
index e9ff019..5585924 100644
--- a/tcp.h
+++ b/tcp.h
@@ -10,11 +10,12 @@
struct ctx;
-void tcp_timer_handler(struct ctx *c, union epoll_ref ref);
-void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
+void tcp_timer_handler(const struct ctx *c, union epoll_ref ref);
+void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
const struct timespec *now);
-void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events);
-int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
+void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events);
+int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
const struct pool *p, int idx, const struct timespec *now);
int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
@@ -58,16 +59,12 @@ union tcp_listen_epoll_ref {
* @fwd_in: Port forwarding configuration for inbound packets
* @fwd_out: Port forwarding configuration for outbound packets
* @timer_run: Timestamp of most recent timer run
- * @kernel_snd_wnd: Kernel reports sending window (with commit 8f7baad7f035)
* @pipe_size: Size of pipes for spliced connections
*/
struct tcp_ctx {
struct fwd_ports fwd_in;
struct fwd_ports fwd_out;
struct timespec timer_run;
-#ifdef HAS_SND_WND
- int kernel_snd_wnd;
-#endif
size_t pipe_size;
};
diff --git a/tcp_buf.c b/tcp_buf.c
index 1a39846..c886c92 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -239,7 +239,7 @@ void tcp_flags_flush(const struct ctx *c)
* @frames: Two-dimensional array containing queued frames with sub-iovs
* @num_frames: Number of entries in the two arrays to be compared
*/
-static void tcp_revert_seq(struct ctx *c, struct tcp_tap_conn **conns,
+static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
struct iovec (*frames)[TCP_NUM_IOVS], int num_frames)
{
int i;
@@ -264,7 +264,7 @@ static void tcp_revert_seq(struct ctx *c, struct tcp_tap_conn **conns,
* tcp_payload_flush() - Send out buffers for segments with data
* @c: Execution context
*/
-void tcp_payload_flush(struct ctx *c)
+void tcp_payload_flush(const struct ctx *c)
{
size_t m;
@@ -293,7 +293,7 @@ void tcp_payload_flush(struct ctx *c)
*
* Return: negative error code on connection reset, 0 otherwise
*/
-int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
struct tcp_flags_t *payload;
struct iovec *iov;
@@ -361,7 +361,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
* @seq: Sequence number to be sent
*/
-static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
+static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
ssize_t dlen, int no_csum, uint32_t seq)
{
struct iovec *iov;
@@ -405,7 +405,7 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
*
* #syscalls recvmsg
*/
-int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
+int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
{
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
int fill_bufs, send_bufs = 0, last_len, iov_rem = 0;
diff --git a/tcp_buf.h b/tcp_buf.h
index 3db4c56..8d4b615 100644
--- a/tcp_buf.h
+++ b/tcp_buf.h
@@ -9,8 +9,8 @@
void tcp_sock4_iov_init(const struct ctx *c);
void tcp_sock6_iov_init(const struct ctx *c);
void tcp_flags_flush(const struct ctx *c);
-void tcp_payload_flush(struct ctx *c);
-int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
-int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
+void tcp_payload_flush(const struct ctx *c);
+int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
+int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
#endif /*TCP_BUF_H */
diff --git a/tcp_internal.h b/tcp_internal.h
index aa8bb64..bd634be 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -82,7 +82,7 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
conn_event_do(c, conn, event); \
} while (0)
-void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
+void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
#define tcp_rst(c, conn) \
do { \
flow_dbg((conn), "TCP reset at %s:%i", __func__, __LINE__); \
@@ -94,7 +94,7 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
const uint16_t *check, uint32_t seq);
int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
int force_seq, struct tcp_info *tinfo);
-int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, int flags,
+int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn, int flags,
struct tcphdr *th, char *data, size_t *optlen);
#endif /* TCP_INTERNAL_H */