aboutgitcodebugslistschat
path: root/tcp_conn.h
blob: 7c450a06249cca1b83f6dc9605dd4f6adefac866 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14













                                                                 









                                                                     
                                                                      
                                                          


























                                                                                


                                                            

















































































                                                                                
                                                       











                                                                                


                                                         






























                                              











                                                                            




                                                                        
                                                                     


                                                                   
 
                       
/* SPDX-License-Identifier: AGPL-3.0-or-later
 * Copyright Red Hat
 * Author: Stefano Brivio <sbrivio@redhat.com>
 * Author: David Gibson <david@gibson.dropbear.id.au>
 *
 * TCP connection tracking data structures, used by tcp.c and
 * tcp_splice.c.  Shouldn't be included in non-TCP code.
 */
#ifndef TCP_CONN_H
#define TCP_CONN_H

#define TCP_HASH_BUCKET_BITS		(TCP_CONN_INDEX_BITS + 1)

/**
 * struct tcp_conn_common - Common fields for spliced and non-spliced
 * @spliced:		Is this a spliced connection?
 */
struct tcp_conn_common {
	bool spliced	:1;
};

extern const char *tcp_common_flag_str[];

/**
 * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
 * @c:			Fields common with tcp_splice_conn
 * @next_index:		Connection index of next item in hash chain, -1 for none
 * @tap_mss:		MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
 * @sock:		Socket descriptor number
 * @events:		Connection events, implying connection states
 * @timer:		timerfd descriptor for timeout events
 * @flags:		Connection flags representing internal attributes
 * @hash_bucket:	Bucket index in connection lookup hash table
 * @retrans:		Number of retransmissions occurred due to ACK_TIMEOUT
 * @ws_from_tap:	Window scaling factor advertised from tap/guest
 * @ws_to_tap:		Window scaling factor advertised to tap/guest
 * @sndbuf:		Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
 * @seq_dup_ack_approx:	Last duplicate ACK number sent to tap
 * @a.a6:		IPv6 remote address, can be IPv4-mapped
 * @a.a4.zero:		Zero prefix for IPv4-mapped, see RFC 6890, Table 20
 * @a.a4.one:		Ones prefix for IPv4-mapped
 * @a.a4.a:		IPv4 address
 * @tap_port:		Guest-facing tap port
 * @sock_port:		Remote, socket-facing port
 * @wnd_from_tap:	Last window size from tap, unscaled (as received)
 * @wnd_to_tap:		Sending window advertised to tap, unscaled (as sent)
 * @seq_to_tap:		Next sequence for packets to tap
 * @seq_ack_from_tap:	Last ACK number received from tap
 * @seq_from_tap:	Next sequence for packets from tap (not actually sent)
 * @seq_ack_to_tap:	Last ACK number sent to tap
 * @seq_init_from_tap:	Initial sequence number from tap
 */
struct tcp_tap_conn {
	/* Must be first element to match tcp_splice_conn */
	struct tcp_conn_common c;

	int	 	next_index	:TCP_CONN_INDEX_BITS + 2;

#define TCP_RETRANS_BITS		3
	unsigned int	retrans		:TCP_RETRANS_BITS;
#define TCP_MAX_RETRANS			((1U << TCP_RETRANS_BITS) - 1)

#define TCP_WS_BITS			4	/* RFC 7323 */
#define TCP_WS_MAX			14
	unsigned int	ws_from_tap	:TCP_WS_BITS;
	unsigned int	ws_to_tap	:TCP_WS_BITS;


	int		sock		:SOCKET_REF_BITS;

	uint8_t		events;
#define CLOSED			0
#define SOCK_ACCEPTED		BIT(0)	/* implies SYN sent to tap */
#define TAP_SYN_RCVD		BIT(1)	/* implies socket connecting */
#define  TAP_SYN_ACK_SENT	BIT( 3)	/* implies socket connected */
#define ESTABLISHED		BIT(2)
#define  SOCK_FIN_RCVD		BIT( 3)
#define  SOCK_FIN_SENT		BIT( 4)
#define  TAP_FIN_RCVD		BIT( 5)
#define  TAP_FIN_SENT		BIT( 6)
#define  TAP_FIN_ACKED		BIT( 7)

#define	CONN_STATE_BITS		/* Setting these clears other flags */	\
	(SOCK_ACCEPTED | TAP_SYN_RCVD | ESTABLISHED)


	int		timer		:SOCKET_REF_BITS;

	uint8_t		flags;
#define STALLED			BIT(0)
#define LOCAL			BIT(1)
#define WND_CLAMPED		BIT(2)
#define IN_EPOLL		BIT(3)
#define ACTIVE_CLOSE		BIT(4)
#define ACK_TO_TAP_DUE		BIT(5)
#define ACK_FROM_TAP_DUE	BIT(6)


	unsigned int	hash_bucket	:TCP_HASH_BUCKET_BITS;

#define TCP_MSS_BITS			14
	unsigned int	tap_mss		:TCP_MSS_BITS;
#define MSS_SET(conn, mss)	(conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
#define MSS_GET(conn)		(conn->tap_mss << (16 - TCP_MSS_BITS))


#define SNDBUF_BITS		24
	unsigned int	sndbuf		:SNDBUF_BITS;
#define SNDBUF_SET(conn, bytes)	(conn->sndbuf = ((bytes) >> (32 - SNDBUF_BITS)))
#define SNDBUF_GET(conn)	(conn->sndbuf << (32 - SNDBUF_BITS))

	uint8_t		seq_dup_ack_approx;


	union {
		struct in6_addr a6;
		struct {
			uint8_t zero[10];
			uint8_t one[2];
			struct in_addr a;
		} a4;
	} a;

	in_port_t	tap_port;
	in_port_t	sock_port;

	uint16_t	wnd_from_tap;
	uint16_t	wnd_to_tap;

	uint32_t	seq_to_tap;
	uint32_t	seq_ack_from_tap;
	uint32_t	seq_from_tap;
	uint32_t	seq_ack_to_tap;
	uint32_t	seq_init_from_tap;
};

/**
 * struct tcp_splice_conn - Descriptor for a spliced TCP connection
 * @c:			Fields common with tcp_tap_conn
 * @a:			File descriptor number of socket for accepted connection
 * @pipe_a_b:		Pipe ends for splice() from @a to @b
 * @b:			File descriptor number of peer connected socket
 * @pipe_b_a:		Pipe ends for splice() from @b to @a
 * @events:		Events observed/actions performed on connection
 * @flags:		Connection flags (attributes, not events)
 * @a_read:		Bytes read from @a (not fully written to @b in one shot)
 * @a_written:		Bytes written to @a (not fully written from one @b read)
 * @b_read:		Bytes read from @b (not fully written to @a in one shot)
 * @b_written:		Bytes written to @b (not fully written from one @a read)
*/
struct tcp_splice_conn {
	/* Must be first element to match tcp_tap_conn */
	struct tcp_conn_common c;

	int a;
	int pipe_a_b[2];
	int b;
	int pipe_b_a[2];

	uint8_t events;
#define SPLICE_CLOSED			0
#define SPLICE_CONNECT			BIT(0)
#define SPLICE_ESTABLISHED		BIT(1)
#define A_OUT_WAIT			BIT(2)
#define B_OUT_WAIT			BIT(3)
#define A_FIN_RCVD			BIT(4)
#define B_FIN_RCVD			BIT(5)
#define A_FIN_SENT			BIT(6)
#define B_FIN_SENT			BIT(7)

	uint8_t flags;
#define SPLICE_V6			BIT(0)
#define SPLICE_IN_EPOLL			BIT(1)
#define RCVLOWAT_SET_A			BIT(2)
#define RCVLOWAT_SET_B			BIT(3)
#define RCVLOWAT_ACT_A			BIT(4)
#define RCVLOWAT_ACT_B			BIT(5)
#define CLOSING				BIT(6)

	uint32_t a_read;
	uint32_t a_written;
	uint32_t b_read;
	uint32_t b_written;
};

/**
 * union tcp_conn - Descriptor for a TCP connection (spliced or non-spliced)
 * @c:			Fields common between all variants
 * @tap:		Fields specific to non-spliced connections
 * @splice:		Fields specific to spliced connections
*/
union tcp_conn {
	struct tcp_conn_common c;
	struct tcp_tap_conn tap;
	struct tcp_splice_conn splice;
};

/* TCP connections */
extern union tcp_conn tc[];

void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new);
void tcp_table_compact(struct ctx *c, union tcp_conn *hole);
void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn);
void tcp_splice_timer(struct ctx *c, struct tcp_splice_conn *conn);
void tcp_splice_pipe_refill(const struct ctx *c);


#endif /* TCP_CONN_H */