aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--tcp.c31
-rw-r--r--tcp_conn.h9
-rw-r--r--util.c14
-rw-r--r--util.h1
4 files changed, 48 insertions, 7 deletions
diff --git a/tcp.c b/tcp.c
index 28d3304..4a886ae 100644
--- a/tcp.c
+++ b/tcp.c
@@ -202,9 +202,13 @@
* - ACT_TIMEOUT, in the presence of any event: if no activity is detected on
* either side, the connection is reset
*
- * - ACK_INTERVAL elapsed after data segment received from tap without having
+ * - RTT / 2 elapsed after data segment received from tap without having
* sent an ACK segment, or zero-sized window advertised to tap/guest (flag
- * ACK_TO_TAP_DUE): forcibly check if an ACK segment can be sent
+ * ACK_TO_TAP_DUE): forcibly check if an ACK segment can be sent.
+ *
+ * RTT, here, is an approximation of the RTT value reported by the kernel via
+ * TCP_INFO, with a representable range from RTT_STORE_MIN (100 us) to
+ * RTT_STORE_MAX (3276.8 ms). The timeout value is clamped accordingly.
*
*
* Summary of data flows (with ESTABLISHED event)
@@ -341,7 +345,6 @@ enum {
#define MSS_DEFAULT 536
#define WINDOW_DEFAULT 14600 /* RFC 6928 */
-#define ACK_INTERVAL 10 /* ms */
#define RTO_INIT 1 /* s, RFC 6298 */
#define RTO_INIT_AFTER_SYN_RETRIES 3 /* s, RFC 6298 */
#define FIN_TIMEOUT 60
@@ -593,7 +596,9 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
}
if (conn->flags & ACK_TO_TAP_DUE) {
- it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
+ it.it_value.tv_sec = RTT_GET(conn) / 2 / ((long)1000 * 1000);
+ it.it_value.tv_nsec = RTT_GET(conn) / 2 % ((long)1000 * 1000) *
+ 1000;
} else if (conn->flags & ACK_FROM_TAP_DUE) {
int exp = conn->retries, timeout = RTO_INIT;
if (!(conn->events & ESTABLISHED))
@@ -608,9 +613,17 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
it.it_value.tv_sec = ACT_TIMEOUT;
}
- flow_dbg(conn, "timer expires in %llu.%03llus",
- (unsigned long long)it.it_value.tv_sec,
- (unsigned long long)it.it_value.tv_nsec / 1000 / 1000);
+ if (conn->flags & ACK_TO_TAP_DUE) {
+ flow_trace(conn, "timer expires in %llu.%03llums",
+ (unsigned long)it.it_value.tv_sec * 1000 +
+ (unsigned long long)it.it_value.tv_nsec %
+ ((long)1000 * 1000),
+ (unsigned long long)it.it_value.tv_nsec / 1000);
+ } else {
+ flow_dbg(conn, "timer expires in %llu.%03llus",
+ (unsigned long long)it.it_value.tv_sec,
+ (unsigned long long)it.it_value.tv_nsec / 1000 / 1000);
+ }
if (timerfd_settime(conn->timer, 0, &it, NULL))
flow_perror(conn, "failed to set timer");
@@ -1144,6 +1157,10 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
conn_flag(c, conn, ACK_TO_TAP_DUE);
out:
+ /* Opportunistically store RTT approximation on valid TCP_INFO data */
+ if (tinfo)
+ RTT_SET(conn, tinfo->tcpi_rtt);
+
return new_wnd_to_tap != prev_wnd_to_tap ||
conn->seq_ack_to_tap != prev_ack_to_tap;
}
diff --git a/tcp_conn.h b/tcp_conn.h
index e36910c..9c6ff9e 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -49,6 +49,15 @@ struct tcp_tap_conn {
#define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS))
+#define RTT_EXP_BITS 4
+ unsigned int rtt_exp :RTT_EXP_BITS;
+#define RTT_EXP_MAX MAX_FROM_BITS(RTT_EXP_BITS)
+#define RTT_STORE_MIN 100 /* us, minimum representable */
+#define RTT_STORE_MAX ((long)(RTT_STORE_MIN << RTT_EXP_MAX))
+#define RTT_SET(conn, rtt) \
+ (conn->rtt_exp = MIN(RTT_EXP_MAX, ilog2(MAX(1, rtt / RTT_STORE_MIN))))
+#define RTT_GET(conn) (RTT_STORE_MIN << conn->rtt_exp)
+
int sock :FD_REF_BITS;
uint8_t events;
diff --git a/util.c b/util.c
index 2232a24..bfeb619 100644
--- a/util.c
+++ b/util.c
@@ -614,6 +614,9 @@ int __daemon(int pidfile_fd, int devnull_fd)
* fls() - Find last (most significant) bit set in word
* @x: Word
*
+ * Note: unlike ffs() and other implementations of fls(), notably the one from
+ * the Linux kernel, the starting position is 0 and not 1, that is, fls(1) = 0.
+ *
* Return: position of most significant bit set, starting from 0, -1 if none
*/
int fls(unsigned long x)
@@ -630,6 +633,17 @@ int fls(unsigned long x)
}
/**
+ * ilog2() - Integral part (floor) of binary logarithm (logarithm to the base 2)
+ * @x: Argument
+ *
+ * Return: integral part of binary logarithm of @x, -1 if undefined (if @x is 0)
+ */
+int ilog2(unsigned long x)
+{
+ return fls(x);
+}
+
+/**
* write_file() - Replace contents of file with a string
* @path: File to write
* @buf: String to write
diff --git a/util.h b/util.h
index 744880b..f7a941f 100644
--- a/util.h
+++ b/util.h
@@ -233,6 +233,7 @@ int output_file_open(const char *path, int flags);
void pidfile_write(int fd, pid_t pid);
int __daemon(int pidfile_fd, int devnull_fd);
int fls(unsigned long x);
+int ilog2(unsigned long x);
int write_file(const char *path, const char *buf);
intmax_t read_file_integer(const char *path, intmax_t fallback);
int write_all_buf(int fd, const void *buf, size_t len);