diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2021-09-19 02:29:05 +0200 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2021-09-27 01:28:02 +0200 |
commit | 904b86ade7dba15f74cebde7d351920a76a82d2a (patch) | |
tree | 17245d136baf0de0288febdf372be616ecc7b6d8 /tcp.h | |
parent | 3c839bfc4687c8b36550f22a6f7ee34e08102e27 (diff) | |
download | passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.gz passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.bz2 passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.lz passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.xz passt-904b86ade7dba15f74cebde7d351920a76a82d2a.tar.zst passt-904b86ade7dba15f74cebde7d351920a76a82d2a.zip |
tcp: Rework window handling, timers, add SO_RCVLOWAT and pools for sockets/pipes
This introduces a number of fundamental changes that would be quite
messy to split. Summary:
- advertised window scaling can be as big as we want, we just need
to clamp window sizes to avoid exceeding the size of our "discard"
buffer for unacknowledged data from socket
- add macros to compare sequence numbers
- force sending ACK to guest/tap on PSH segments, always in pasta
mode, whenever we see an overlapping segment, or when we reach a
given threshold compared to our window
- we don't actually use recvmmsg() here, fix comments and label
- introduce pools for pre-opened sockets and pipes, to decrease
latency on new connections
- set receiving and sending buffer sizes to the maximum allowed,
kernel will clamp and round appropriately
- defer clean-up of spliced and non-spliced connection to timer
- in tcp_send_to_tap(), there's no need anymore to keep a large
buffer, shrink it down to what we actually need
- introduce SO_RCVLOWAT setting and activity tracking for spliced
connections, to coalesce data moved by splice() calls as much as
possible
- as we now have a compacted connection table, there's no need to
keep sparse bitmaps tracking connection activity -- simply go
through active connections with a loop in the timer handler
- always clamp the advertised window to half our sending buffer,
too, to minimise retransmissions from the guest/tap
- set TCP_QUICKACK for originating socket in spliced connections,
there's no need to delay them
- fix up timeout for unacknowledged data from socket
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'tcp.h')
-rw-r--r-- | tcp.h | 10 |
1 files changed, 8 insertions, 2 deletions
@@ -11,8 +11,8 @@ struct ctx; void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, struct timespec *now); int tcp_tap_handler(struct ctx *c, int af, void *addr, - struct tap_msg *msg, int count, struct timespec *now); -int tcp_sock_init(struct ctx *c); + struct tap_l4_msg *msg, int count, struct timespec *now); +int tcp_sock_init(struct ctx *c, struct timespec *now); void tcp_timer(struct ctx *c, struct timespec *ts); void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, uint32_t *ip_da); @@ -45,6 +45,9 @@ union tcp_epoll_ref { * @port_to_tap: Ports bound host-side, packets to tap or spliced * @port_to_init: Ports bound namespace-side, spliced to init * @timer_run: Timestamp of most recent timer run + * @kernel_snd_wnd: Kernel reports sending window (with commit 8f7baad7f035) + * @pipe_size: Size of pipes for spliced connections + * @refill_ts: Time of last refill operation for pools of sockets/pipes */ struct tcp_ctx { uint64_t hash_secret[2]; @@ -53,6 +56,9 @@ struct tcp_ctx { uint8_t port_to_tap [USHRT_MAX / 8]; uint8_t port_to_init [USHRT_MAX / 8]; struct timespec timer_run; + int kernel_snd_wnd; + size_t pipe_size; + struct timespec refill_ts; }; #endif /* TCP_H */ |