aboutgitcodebugslistschat
path: root/tcp.h
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-09-19 02:29:05 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-09-27 01:28:02 +0200
commit904b86ade7dba15f74cebde7d351920a76a82d2a (patch)
tree17245d136baf0de0288febdf372be616ecc7b6d8 /tcp.h
parent3c839bfc4687c8b36550f22a6f7ee34e08102e27 (diff)
downloadpasst-904b86ade7dba15f74cebde7d351920a76a82d2a.tar
passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.gz
passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.bz2
passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.lz
passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.xz
passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.zst
passt-904b86ade7dba15f74cebde7d351920a76a82d2a.zip
tcp: Rework window handling, timers, add SO_RCVLOWAT and pools for sockets/pipes
This introduces a number of fundamental changes that would be quite messy to split. Summary: - advertised window scaling can be as big as we want, we just need to clamp window sizes to avoid exceeding the size of our "discard" buffer for unacknowledged data from socket - add macros to compare sequence numbers - force sending ACK to guest/tap on PSH segments, always in pasta mode, whenever we see an overlapping segment, or when we reach a given threshold compared to our window - we don't actually use recvmmsg() here, fix comments and label - introduce pools for pre-opened sockets and pipes, to decrease latency on new connections - set receiving and sending buffer sizes to the maximum allowed, kernel will clamp and round appropriately - defer clean-up of spliced and non-spliced connection to timer - in tcp_send_to_tap(), there's no need anymore to keep a large buffer, shrink it down to what we actually need - introduce SO_RCVLOWAT setting and activity tracking for spliced connections, to coalesce data moved by splice() calls as much as possible - as we now have a compacted connection table, there's no need to keep sparse bitmaps tracking connection activity -- simply go through active connections with a loop in the timer handler - always clamp the advertised window to half our sending buffer, too, to minimise retransmissions from the guest/tap - set TCP_QUICKACK for originating socket in spliced connections, there's no need to delay them - fix up timeout for unacknowledged data from socket Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'tcp.h')
-rw-r--r--tcp.h10
1 files changed, 8 insertions, 2 deletions
diff --git a/tcp.h b/tcp.h
index 359414c..ae983ed 100644
--- a/tcp.h
+++ b/tcp.h
@@ -11,8 +11,8 @@ struct ctx;
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
struct timespec *now);
int tcp_tap_handler(struct ctx *c, int af, void *addr,
- struct tap_msg *msg, int count, struct timespec *now);
-int tcp_sock_init(struct ctx *c);
+ struct tap_l4_msg *msg, int count, struct timespec *now);
+int tcp_sock_init(struct ctx *c, struct timespec *now);
void tcp_timer(struct ctx *c, struct timespec *ts);
void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
uint32_t *ip_da);
@@ -45,6 +45,9 @@ union tcp_epoll_ref {
* @port_to_tap: Ports bound host-side, packets to tap or spliced
* @port_to_init: Ports bound namespace-side, spliced to init
* @timer_run: Timestamp of most recent timer run
+ * @kernel_snd_wnd: Kernel reports sending window (with commit 8f7baad7f035)
+ * @pipe_size: Size of pipes for spliced connections
+ * @refill_ts: Time of last refill operation for pools of sockets/pipes
*/
struct tcp_ctx {
uint64_t hash_secret[2];
@@ -53,6 +56,9 @@ struct tcp_ctx {
uint8_t port_to_tap [USHRT_MAX / 8];
uint8_t port_to_init [USHRT_MAX / 8];
struct timespec timer_run;
+ int kernel_snd_wnd;
+ size_t pipe_size;
+ struct timespec refill_ts;
};
#endif /* TCP_H */