diff options
Diffstat (limited to 'tcp_conn.h')
| -rw-r--r-- | tcp_conn.h | 134 |
1 files changed, 121 insertions, 13 deletions
@@ -12,13 +12,13 @@ /** * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced) * @f: Generic flow information - * @in_epoll: Is the connection in the epoll set? - * @retrans: Number of retransmissions occurred due to ACK_TIMEOUT + * @retries: Number of retries occurred due to timeouts * @ws_from_tap: Window scaling factor advertised from tap/guest * @ws_to_tap: Window scaling factor advertised to tap/guest * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS * @sock: Socket descriptor number * @events: Connection events, implying connection states + * @listening_sock: Listening socket this socket was accept()ed from, or -1 * @timer: timerfd descriptor for timeout events * @flags: Connection flags representing internal attributes * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS @@ -35,11 +35,9 @@ struct tcp_tap_conn { /* Must be first element */ struct flow_common f; - bool in_epoll :1; - -#define TCP_RETRANS_BITS 3 - unsigned int retrans :TCP_RETRANS_BITS; -#define TCP_MAX_RETRANS MAX_FROM_BITS(TCP_RETRANS_BITS) +#define TCP_RETRIES_BITS 3 + unsigned int retries :TCP_RETRIES_BITS; +#define TCP_MAX_RETRIES MAX_FROM_BITS(TCP_RETRIES_BITS) #define TCP_WS_BITS 4 /* RFC 7323 */ #define TCP_WS_MAX 14 @@ -51,6 +49,15 @@ struct tcp_tap_conn { #define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS))) #define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS)) +#define RTT_EXP_BITS 4 + unsigned int rtt_exp :RTT_EXP_BITS; +#define RTT_EXP_MAX MAX_FROM_BITS(RTT_EXP_BITS) +#define RTT_STORE_MIN 100 /* us, minimum representable */ +#define RTT_STORE_MAX ((long)(RTT_STORE_MIN << RTT_EXP_MAX)) +#define RTT_SET(conn, rtt) \ + (conn->rtt_exp = MIN(RTT_EXP_MAX, ilog2(MAX(1, rtt / RTT_STORE_MIN)))) +#define RTT_GET(conn) (RTT_STORE_MIN << conn->rtt_exp) + int sock :FD_REF_BITS; uint8_t events; @@ -68,6 +75,7 @@ struct tcp_tap_conn { #define CONN_STATE_BITS /* Setting these clears other flags */ \ (SOCK_ACCEPTED | TAP_SYN_RCVD | ESTABLISHED) + int listening_sock; int timer :FD_REF_BITS; @@ -78,6 +86,7 @@ struct tcp_tap_conn { #define ACK_TO_TAP_DUE BIT(3) #define ACK_FROM_TAP_DUE BIT(4) #define ACK_FROM_TAP_BLOCKS BIT(5) +#define SYN_RETRIED BIT(6) #define SNDBUF_BITS 24 unsigned int sndbuf :SNDBUF_BITS; @@ -97,6 +106,95 @@ struct tcp_tap_conn { }; /** + * struct tcp_tap_transfer - Migrated TCP data, flow table part, network order + * @pif: Interfaces for each side of the flow + * @side: Addresses and ports for each side of the flow + * @retries: Number of retries occurred due to timeouts + * @ws_from_tap: Window scaling factor advertised from tap/guest + * @ws_to_tap: Window scaling factor advertised to tap/guest + * @events: Connection events, implying connection states + * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS + * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS + * @flags: Connection flags representing internal attributes + * @seq_dup_ack_approx: Last duplicate ACK number sent to tap + * @wnd_from_tap: Last window size from tap, unscaled (as received) + * @wnd_to_tap: Sending window advertised to tap, unscaled (as sent) + * @seq_to_tap: Next sequence for packets to tap + * @seq_ack_from_tap: Last ACK number received from tap + * @seq_from_tap: Next sequence for packets from tap (not actually sent) + * @seq_ack_to_tap: Last ACK number sent to tap + * @seq_init_from_tap: Initial sequence number from tap +*/ +struct tcp_tap_transfer { + uint8_t pif[SIDES]; + struct flowside side[SIDES]; + + uint8_t retries; + uint8_t ws_from_tap; + uint8_t ws_to_tap; + uint8_t events; + + uint32_t tap_mss; + + uint32_t sndbuf; + + uint8_t flags; + uint8_t seq_dup_ack_approx; + + uint16_t wnd_from_tap; + uint16_t wnd_to_tap; + + uint32_t seq_to_tap; + uint32_t seq_ack_from_tap; + uint32_t seq_from_tap; + uint32_t seq_ack_to_tap; + uint32_t seq_init_from_tap; +} __attribute__((packed, aligned(__alignof__(uint32_t)))); + +/** + * struct tcp_tap_transfer_ext - Migrated TCP data, outside flow, network order + * @seq_snd: Socket-side send sequence + * @seq_rcv: Socket-side receive sequence + * @sndq: Length of pending send queue (unacknowledged / not sent) + * @notsent: Part of pending send queue that wasn't sent out yet + * @rcvq: Length of pending receive queue + * @mss: Socket-side MSS clamp + * @timestamp: RFC 7323 timestamp + * @snd_wl1: Next sequence used in window probe (next sequence - 1) + * @snd_wnd: Socket-side sending window + * @max_window: Window clamp + * @rcv_wnd: Socket-side receive window + * @rcv_wup: rcv_nxt on last window update sent + * @snd_ws: Window scaling factor, send + * @rcv_ws: Window scaling factor, receive + * @tcpi_state: Connection state in TCP_INFO style (enum, tcp_states.h) + * @tcpi_options: TCPI_OPT_* constants (timestamps, selective ACK) + */ +struct tcp_tap_transfer_ext { + uint32_t seq_snd; + uint32_t seq_rcv; + + uint32_t sndq; + uint32_t notsent; + uint32_t rcvq; + + uint32_t mss; + uint32_t timestamp; + + /* We can't just use struct tcp_repair_window: we need network order */ + uint32_t snd_wl1; + uint32_t snd_wnd; + uint32_t max_window; + uint32_t rcv_wnd; + uint32_t rcv_wup; + + uint8_t snd_ws; + uint8_t rcv_ws; + uint8_t tcpi_state; + uint8_t tcpi_options; +} __attribute__((packed, aligned(__alignof__(uint32_t)))); + +/** * struct tcp_splice_conn - Descriptor for a spliced TCP connection * @f: Generic flow information * @s: File descriptor for sockets @@ -105,7 +203,6 @@ struct tcp_tap_conn { * @written: Bytes written (not fully written from one other side read) * @events: Events observed/actions performed on connection * @flags: Connection flags (attributes, not events) - * @in_epoll: Is the connection in the epoll set? */ struct tcp_splice_conn { /* Must be first element */ @@ -129,8 +226,6 @@ struct tcp_splice_conn { #define RCVLOWAT_SET(sidei_) ((sidei_) ? BIT(1) : BIT(0)) #define RCVLOWAT_ACT(sidei_) ((sidei_) ? BIT(3) : BIT(2)) #define CLOSING BIT(4) - - bool in_epoll :1; }; /* Socket pools */ @@ -140,11 +235,24 @@ extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE]; extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE]; bool tcp_flow_defer(const struct tcp_tap_conn *conn); + +int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn); +int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn); + +int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn); +int tcp_flow_migrate_source_ext(const struct ctx *c, int fd, + const struct tcp_tap_conn *conn); + +int tcp_flow_migrate_target(struct ctx *c, int fd); +int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd); + +bool tcp_flow_is_established(const struct tcp_tap_conn *conn); + bool tcp_splice_flow_defer(struct tcp_splice_conn *conn); -void tcp_splice_timer(const struct ctx *c, struct tcp_splice_conn *conn); +void tcp_splice_timer(struct tcp_splice_conn *conn); int tcp_conn_pool_sock(int pool[]); -int tcp_conn_sock(const struct ctx *c, sa_family_t af); -int tcp_sock_refill_pool(const struct ctx *c, int pool[], sa_family_t af); +int tcp_conn_sock(sa_family_t af); +int tcp_sock_refill_pool(int pool[], sa_family_t af); void tcp_splice_refill(const struct ctx *c); #endif /* TCP_CONN_H */ |
