aboutgitcodebugslistschat
path: root/tcp_conn.h
diff options
context:
space:
mode:
Diffstat (limited to 'tcp_conn.h')
-rw-r--r--tcp_conn.h134
1 files changed, 121 insertions, 13 deletions
diff --git a/tcp_conn.h b/tcp_conn.h
index d342680..9c6ff9e 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -12,13 +12,13 @@
/**
* struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
* @f: Generic flow information
- * @in_epoll: Is the connection in the epoll set?
- * @retrans: Number of retransmissions occurred due to ACK_TIMEOUT
+ * @retries: Number of retries occurred due to timeouts
* @ws_from_tap: Window scaling factor advertised from tap/guest
* @ws_to_tap: Window scaling factor advertised to tap/guest
* @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
* @sock: Socket descriptor number
* @events: Connection events, implying connection states
+ * @listening_sock: Listening socket this socket was accept()ed from, or -1
* @timer: timerfd descriptor for timeout events
* @flags: Connection flags representing internal attributes
* @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
@@ -35,11 +35,9 @@ struct tcp_tap_conn {
/* Must be first element */
struct flow_common f;
- bool in_epoll :1;
-
-#define TCP_RETRANS_BITS 3
- unsigned int retrans :TCP_RETRANS_BITS;
-#define TCP_MAX_RETRANS MAX_FROM_BITS(TCP_RETRANS_BITS)
+#define TCP_RETRIES_BITS 3
+ unsigned int retries :TCP_RETRIES_BITS;
+#define TCP_MAX_RETRIES MAX_FROM_BITS(TCP_RETRIES_BITS)
#define TCP_WS_BITS 4 /* RFC 7323 */
#define TCP_WS_MAX 14
@@ -51,6 +49,15 @@ struct tcp_tap_conn {
#define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS))
+#define RTT_EXP_BITS 4
+ unsigned int rtt_exp :RTT_EXP_BITS;
+#define RTT_EXP_MAX MAX_FROM_BITS(RTT_EXP_BITS)
+#define RTT_STORE_MIN 100 /* us, minimum representable */
+#define RTT_STORE_MAX ((long)(RTT_STORE_MIN << RTT_EXP_MAX))
+#define RTT_SET(conn, rtt) \
+ (conn->rtt_exp = MIN(RTT_EXP_MAX, ilog2(MAX(1, rtt / RTT_STORE_MIN))))
+#define RTT_GET(conn) (RTT_STORE_MIN << conn->rtt_exp)
+
int sock :FD_REF_BITS;
uint8_t events;
@@ -68,6 +75,7 @@ struct tcp_tap_conn {
#define CONN_STATE_BITS /* Setting these clears other flags */ \
(SOCK_ACCEPTED | TAP_SYN_RCVD | ESTABLISHED)
+ int listening_sock;
int timer :FD_REF_BITS;
@@ -78,6 +86,7 @@ struct tcp_tap_conn {
#define ACK_TO_TAP_DUE BIT(3)
#define ACK_FROM_TAP_DUE BIT(4)
#define ACK_FROM_TAP_BLOCKS BIT(5)
+#define SYN_RETRIED BIT(6)
#define SNDBUF_BITS 24
unsigned int sndbuf :SNDBUF_BITS;
@@ -97,6 +106,95 @@ struct tcp_tap_conn {
};
/**
+ * struct tcp_tap_transfer - Migrated TCP data, flow table part, network order
+ * @pif: Interfaces for each side of the flow
+ * @side: Addresses and ports for each side of the flow
+ * @retries: Number of retries occurred due to timeouts
+ * @ws_from_tap: Window scaling factor advertised from tap/guest
+ * @ws_to_tap: Window scaling factor advertised to tap/guest
+ * @events: Connection events, implying connection states
+ * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
+ * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
+ * @flags: Connection flags representing internal attributes
+ * @seq_dup_ack_approx: Last duplicate ACK number sent to tap
+ * @wnd_from_tap: Last window size from tap, unscaled (as received)
+ * @wnd_to_tap: Sending window advertised to tap, unscaled (as sent)
+ * @seq_to_tap: Next sequence for packets to tap
+ * @seq_ack_from_tap: Last ACK number received from tap
+ * @seq_from_tap: Next sequence for packets from tap (not actually sent)
+ * @seq_ack_to_tap: Last ACK number sent to tap
+ * @seq_init_from_tap: Initial sequence number from tap
+*/
+struct tcp_tap_transfer {
+ uint8_t pif[SIDES];
+ struct flowside side[SIDES];
+
+ uint8_t retries;
+ uint8_t ws_from_tap;
+ uint8_t ws_to_tap;
+ uint8_t events;
+
+ uint32_t tap_mss;
+
+ uint32_t sndbuf;
+
+ uint8_t flags;
+ uint8_t seq_dup_ack_approx;
+
+ uint16_t wnd_from_tap;
+ uint16_t wnd_to_tap;
+
+ uint32_t seq_to_tap;
+ uint32_t seq_ack_from_tap;
+ uint32_t seq_from_tap;
+ uint32_t seq_ack_to_tap;
+ uint32_t seq_init_from_tap;
+} __attribute__((packed, aligned(__alignof__(uint32_t))));
+
+/**
+ * struct tcp_tap_transfer_ext - Migrated TCP data, outside flow, network order
+ * @seq_snd: Socket-side send sequence
+ * @seq_rcv: Socket-side receive sequence
+ * @sndq: Length of pending send queue (unacknowledged / not sent)
+ * @notsent: Part of pending send queue that wasn't sent out yet
+ * @rcvq: Length of pending receive queue
+ * @mss: Socket-side MSS clamp
+ * @timestamp: RFC 7323 timestamp
+ * @snd_wl1: Next sequence used in window probe (next sequence - 1)
+ * @snd_wnd: Socket-side sending window
+ * @max_window: Window clamp
+ * @rcv_wnd: Socket-side receive window
+ * @rcv_wup: rcv_nxt on last window update sent
+ * @snd_ws: Window scaling factor, send
+ * @rcv_ws: Window scaling factor, receive
+ * @tcpi_state: Connection state in TCP_INFO style (enum, tcp_states.h)
+ * @tcpi_options: TCPI_OPT_* constants (timestamps, selective ACK)
+ */
+struct tcp_tap_transfer_ext {
+ uint32_t seq_snd;
+ uint32_t seq_rcv;
+
+ uint32_t sndq;
+ uint32_t notsent;
+ uint32_t rcvq;
+
+ uint32_t mss;
+ uint32_t timestamp;
+
+ /* We can't just use struct tcp_repair_window: we need network order */
+ uint32_t snd_wl1;
+ uint32_t snd_wnd;
+ uint32_t max_window;
+ uint32_t rcv_wnd;
+ uint32_t rcv_wup;
+
+ uint8_t snd_ws;
+ uint8_t rcv_ws;
+ uint8_t tcpi_state;
+ uint8_t tcpi_options;
+} __attribute__((packed, aligned(__alignof__(uint32_t))));
+
+/**
* struct tcp_splice_conn - Descriptor for a spliced TCP connection
* @f: Generic flow information
* @s: File descriptor for sockets
@@ -105,7 +203,6 @@ struct tcp_tap_conn {
* @written: Bytes written (not fully written from one other side read)
* @events: Events observed/actions performed on connection
* @flags: Connection flags (attributes, not events)
- * @in_epoll: Is the connection in the epoll set?
*/
struct tcp_splice_conn {
/* Must be first element */
@@ -129,8 +226,6 @@ struct tcp_splice_conn {
#define RCVLOWAT_SET(sidei_) ((sidei_) ? BIT(1) : BIT(0))
#define RCVLOWAT_ACT(sidei_) ((sidei_) ? BIT(3) : BIT(2))
#define CLOSING BIT(4)
-
- bool in_epoll :1;
};
/* Socket pools */
@@ -140,11 +235,24 @@ extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE];
extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE];
bool tcp_flow_defer(const struct tcp_tap_conn *conn);
+
+int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn);
+int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn);
+
+int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn);
+int tcp_flow_migrate_source_ext(const struct ctx *c, int fd,
+ const struct tcp_tap_conn *conn);
+
+int tcp_flow_migrate_target(struct ctx *c, int fd);
+int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd);
+
+bool tcp_flow_is_established(const struct tcp_tap_conn *conn);
+
bool tcp_splice_flow_defer(struct tcp_splice_conn *conn);
-void tcp_splice_timer(const struct ctx *c, struct tcp_splice_conn *conn);
+void tcp_splice_timer(struct tcp_splice_conn *conn);
int tcp_conn_pool_sock(int pool[]);
-int tcp_conn_sock(const struct ctx *c, sa_family_t af);
-int tcp_sock_refill_pool(const struct ctx *c, int pool[], sa_family_t af);
+int tcp_conn_sock(sa_family_t af);
+int tcp_sock_refill_pool(int pool[], sa_family_t af);
void tcp_splice_refill(const struct ctx *c);
#endif /* TCP_CONN_H */