From 49631a38a6ec550fb9254f2f9e4a049eea02ed6d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 21 Jul 2021 17:44:39 +0200 Subject: tcp, udp: Split IPv4 and IPv6 bound port sets Allow to bind IPv4 and IPv6 ports to tap, namespace or init separately. Port numbers of TCP ports that are bound in a namespace are also bound for UDP for convenience (e.g. iperf3), and IPv4 ports are always bound if the corresponding IPv6 port is bound (socket might not have the IPV6_V6ONLY option set). This will also be configurable later. Signed-off-by: Stefano Brivio --- passt.c | 46 +++++++++++++++++++++++++++++--------------- tcp.c | 61 ++++++++++++++++++++++++++++++++++++---------------------- tcp.h | 18 +++++++++++------ udp.c | 68 +++++++++++++++++++++++++++++++++++++---------------------------- udp.h | 17 +++++++++++------ 5 files changed, 131 insertions(+), 79 deletions(-) diff --git a/passt.c b/passt.c index b0fcbc0..f6bfa96 100644 --- a/passt.c +++ b/passt.c @@ -326,13 +326,21 @@ static int get_bound_ports_ns(void *arg) ns_enter(c->pasta_pid); if (c->v4) { - procfs_scan_listen("tcp", c->tcp.port_to_ns); - procfs_scan_listen("udp", c->udp.port_to_ns); + procfs_scan_listen("tcp", c->tcp.port4_to_tap); + procfs_scan_listen("tcp", c->udp.port4_to_tap); + procfs_scan_listen("udp", c->udp.port4_to_tap); } if (c->v6) { - procfs_scan_listen("tcp6", c->tcp.port_to_ns); - procfs_scan_listen("udp6", c->udp.port_to_ns); + if (c->v4) { + procfs_scan_listen("tcp6", c->tcp.port4_to_ns); + procfs_scan_listen("tcp6", c->udp.port4_to_ns); + procfs_scan_listen("udp6", c->udp.port4_to_ns); + } + + procfs_scan_listen("tcp6", c->tcp.port6_to_ns); + procfs_scan_listen("tcp6", c->udp.port6_to_ns); + procfs_scan_listen("udp6", c->udp.port6_to_ns); } return 0; @@ -346,23 +354,23 @@ static void get_bound_ports(struct ctx *c) { char ns_fn_stack[NS_FN_STACK_SIZE]; - if (c->mode == MODE_PASST) { - memset(c->tcp.port_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); - memset(c->udp.port_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); - return; - } - clone(get_bound_ports_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2, CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, (void *)c); if (c->v4) { - procfs_scan_listen("tcp", c->tcp.port_to_init); - procfs_scan_listen("udp", c->udp.port_to_init); + procfs_scan_listen("tcp", c->tcp.port4_to_init); + procfs_scan_listen("udp", c->udp.port4_to_init); } if (c->v6) { - procfs_scan_listen("tcp6", c->tcp.port_to_init); - procfs_scan_listen("udp6", c->udp.port_to_init); + if (c->v4) { + procfs_scan_listen("tcp6", c->tcp.port4_to_init); + procfs_scan_listen("udp6", c->udp.port4_to_init); + } + + procfs_scan_listen("tcp6", c->tcp.port6_to_init); + procfs_scan_listen("udp6", c->udp.port6_to_init); + } } @@ -509,7 +517,15 @@ int main(int argc, char **argv) get_routes(&c); get_addrs(&c); get_dns(&c); - get_bound_ports(&c); + + if (c.mode == MODE_PASST) { + memset(&c.tcp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); + memset(&c.tcp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); + memset(&c.udp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); + memset(&c.udp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8); + } else { + get_bound_ports(&c); + } proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4); diff --git a/tcp.c b/tcp.c index efcf466..d015b48 100644 --- a/tcp.c +++ b/tcp.c @@ -1770,7 +1770,8 @@ static int tcp_splice_new(struct ctx *c, struct tcp_splice_conn *conn, struct tcp_splice_connect_ns_arg ns_arg = { c, conn, v6, port, 0 }; char ns_fn_stack[NS_FN_STACK_SIZE]; - if (bitmap_isset(c->tcp.port_to_ns, port)) { + if ((!v6 && bitmap_isset(c->tcp.port4_to_ns, port)) || + (v6 && bitmap_isset(c->tcp.port6_to_ns, port))) { clone(tcp_splice_connect_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2, CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, @@ -2082,19 +2083,24 @@ static int tcp_sock_init_ns(void *arg) ns_enter(c->pasta_pid); - for (port = 0; !PORT_IS_EPHEMERAL(port); port++) { - if (!bitmap_isset(c->tcp.port_to_init, port)) - continue; + if (c->v4) { + tref.v6 = 0; + for (port = 0; port < USHRT_MAX; port++) { + if (!bitmap_isset(c->tcp.port4_to_init, port)) + continue; - tref.index = port; - - if (c->v4) { - tref.v6 = 0; + tref.index = port; sock_l4(c, AF_INET, IPPROTO_TCP, port, 1, tref.u32); } + } + + if (c->v6) { + tref.v6 = 1; + for (port = 0; port < USHRT_MAX; port++) { + if (!bitmap_isset(c->tcp.port6_to_init, port)) + continue; - if (c->v6) { - tref.v6 = 1; + tref.index = port; sock_l4(c, AF_INET6, IPPROTO_TCP, port, 1, tref.u32); } } @@ -2116,24 +2122,33 @@ int tcp_sock_init(struct ctx *c) getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); - for (port = 0; !PORT_IS_EPHEMERAL(port); port++) { - if (bitmap_isset(c->tcp.port_to_ns, port)) - tref.splice = 1; - else if (bitmap_isset(c->tcp.port_to_tap, port)) - tref.splice = 0; - else - continue; - - tref.index = port; + if (c->v4) { + tref.v6 = 0; + for (port = 0; port < USHRT_MAX; port++) { + if (bitmap_isset(c->tcp.port4_to_ns, port)) + tref.splice = 1; + else if (bitmap_isset(c->tcp.port4_to_tap, port)) + tref.splice = 0; + else + continue; - if (c->v4) { - tref.v6 = 0; + tref.index = port; sock_l4(c, AF_INET, IPPROTO_TCP, port, tref.splice, tref.u32); } + } + + if (c->v6) { + tref.v6 = 1; + for (port = 0; port < USHRT_MAX; port++) { + if (bitmap_isset(c->tcp.port6_to_ns, port)) + tref.splice = 1; + else if (bitmap_isset(c->tcp.port6_to_tap, port)) + tref.splice = 0; + else + continue; - if (c->v6) { - tref.v6 = 1; + tref.index = port; sock_l4(c, AF_INET6, IPPROTO_TCP, port, tref.splice, tref.u32); } diff --git a/tcp.h b/tcp.h index 6a9aa4a..52f4bb0 100644 --- a/tcp.h +++ b/tcp.h @@ -38,18 +38,24 @@ union tcp_epoll_ref { * @hash_secret: 128-bit secret for hash functions, ISN and hash table * @tap_conn_count: Count of tap connections in connection table * @splice_conn_count: Count of spliced connections in connection table - * @port_to_tap: Ports bound host/init-side, packets to guest/tap - * @port_to_init: Ports bound namespace-side, spliced to init - * @port_to_ns: Ports bound init-side, spliced to namespace + * @port4_to_tap: IPv4 ports bound host/init-side, packets to guest/tap + * @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap + * @port4_to_init: IPv4 ports bound namespace-side, spliced to init + * @port6_to_init: IPv6 ports bound namespace-side, spliced to init + * @port4_to_ns: IPv4 ports bound init-side, spliced to namespace + * @port6_to_ns: IPv6 ports bound init-side, spliced to namespace * @timer_run: Timestamp of most recent timer run */ struct tcp_ctx { uint64_t hash_secret[2]; int tap_conn_count; int splice_conn_count; - uint8_t port_to_tap [USHRT_MAX / 8]; - uint8_t port_to_init [USHRT_MAX / 8]; - uint8_t port_to_ns [USHRT_MAX / 8]; + uint8_t port4_to_tap [USHRT_MAX / 8]; + uint8_t port6_to_tap [USHRT_MAX / 8]; + uint8_t port4_to_init [USHRT_MAX / 8]; + uint8_t port6_to_init [USHRT_MAX / 8]; + uint8_t port4_to_ns [USHRT_MAX / 8]; + uint8_t port6_to_ns [USHRT_MAX / 8]; struct timespec timer_run; }; diff --git a/udp.c b/udp.c index 3eb2ecf..30659e0 100644 --- a/udp.c +++ b/udp.c @@ -890,19 +890,24 @@ int udp_sock_init_ns(void *arg) ns_enter(c->pasta_pid); - for (port = 0; port < USHRT_MAX; port++) { - if (!bitmap_isset(c->udp.port_to_init, port)) - continue; - - uref.port = port; + if (c->v4) { + uref.v6 = 0; + for (port = 0; port < USHRT_MAX; port++) { + if (!bitmap_isset(c->udp.port4_to_init, port)) + continue; - if (c->v4) { - uref.v6 = 0; + uref.port = port; sock_l4(c, AF_INET, IPPROTO_UDP, port, 1, uref.u32); } + } + + if (c->v6) { + uref.v6 = 1; + for (port = 0; port < USHRT_MAX; port++) { + if (!bitmap_isset(c->udp.port6_to_init, port)) + continue; - if (c->v6) { - uref.v6 = 1; + uref.port = port; sock_l4(c, AF_INET6, IPPROTO_UDP, port, 1, uref.u32); } } @@ -976,40 +981,45 @@ int udp_sock_init(struct ctx *c) in_port_t port; int s; - for (port = 0; port < USHRT_MAX; port++) { - if (bitmap_isset(c->udp.port_to_ns, port)) - uref.splice = UDP_TO_NS; - else if (bitmap_isset(c->udp.port_to_tap, port)) - uref.splice = 0; - else - continue; - - uref.port = port; + if (c->v4) { + uref.v6 = 0; + for (port = 0; port < USHRT_MAX; port++) { + if (bitmap_isset(c->udp.port4_to_ns, port)) + uref.splice = UDP_TO_NS; + else if (bitmap_isset(c->udp.port4_to_tap, port)) + uref.splice = 0; + else + continue; - if (c->v4) { - uref.v6 = 0; + uref.port = port; s = sock_l4(c, AF_INET, IPPROTO_UDP, port, uref.splice == UDP_TO_NS, uref.u32); - if (!uref.splice && s > 0) udp_tap_map[V4][port].sock = s; } - if (c->v6) { - uref.v6 = 1; + udp_sock4_iov_init(); + } + + if (c->v6) { + uref.v6 = 1; + for (port = 0; port < USHRT_MAX; port++) { + if (bitmap_isset(c->udp.port6_to_ns, port)) + uref.splice = UDP_TO_NS; + else if (bitmap_isset(c->udp.port6_to_tap, port)) + uref.splice = 0; + else + continue; + + uref.port = port; s = sock_l4(c, AF_INET6, IPPROTO_UDP, port, uref.splice == UDP_TO_NS, uref.u32); - if (!uref.splice && s > 0) udp_tap_map[V6][port].sock = s; } - } - - if (c->v4) - udp_sock4_iov_init(); - if (c->v6) udp_sock6_iov_init(); + } if (c->mode == MODE_PASTA) { udp_splice_iov_init(); diff --git a/udp.h b/udp.h index 12a28dd..0b2ef22 100644 --- a/udp.h +++ b/udp.h @@ -38,15 +38,20 @@ union udp_epoll_ref { /** * struct udp_ctx - Execution context for UDP - * @port_to_tap: Ports bound host/init-side, packets to guest/tap - * @port_to_init: Ports bound namespace-side, spliced to init - * @port_to_ns: Ports bound init-side, spliced to namespace + * @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap + * @port4_to_init: IPv4 ports bound namespace-side, spliced to init + * @port6_to_init: IPv6 ports bound namespace-side, spliced to init + * @port4_to_ns: IPv4 ports bound init-side, spliced to namespace + * @port6_to_ns: IPv6 ports bound init-side, spliced to namespace * @timer_run: Timestamp of most recent timer run */ struct udp_ctx { - uint8_t port_to_tap [USHRT_MAX / 8]; - uint8_t port_to_init [USHRT_MAX / 8]; - uint8_t port_to_ns [USHRT_MAX / 8]; + uint8_t port4_to_tap [USHRT_MAX / 8]; + uint8_t port6_to_tap [USHRT_MAX / 8]; + uint8_t port4_to_init [USHRT_MAX / 8]; + uint8_t port6_to_init [USHRT_MAX / 8]; + uint8_t port4_to_ns [USHRT_MAX / 8]; + uint8_t port6_to_ns [USHRT_MAX / 8]; struct timespec timer_run; }; -- cgit v1.2.3