diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2021-07-26 14:10:29 +0200 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2021-07-26 14:10:29 +0200 |
commit | 86b273150a47c6f5783db865d1385675f5c4e5a6 (patch) | |
tree | bc0011bc64f00a519817eb7b74a7d20664f8e3fb | |
parent | f4aaa471a1d304b0b6c767ef4b2fb88b45c02ef1 (diff) | |
download | passt-86b273150a47c6f5783db865d1385675f5c4e5a6.tar passt-86b273150a47c6f5783db865d1385675f5c4e5a6.tar.gz passt-86b273150a47c6f5783db865d1385675f5c4e5a6.tar.bz2 passt-86b273150a47c6f5783db865d1385675f5c4e5a6.tar.lz passt-86b273150a47c6f5783db865d1385675f5c4e5a6.tar.xz passt-86b273150a47c6f5783db865d1385675f5c4e5a6.tar.zst passt-86b273150a47c6f5783db865d1385675f5c4e5a6.zip |
tcp, udp: Allow binding ports in init namespace to both tap and loopback
Traffic with loopback source address will be forwarded to the direct
loopback connection in the namespace, and the tap interface is used
for the rest.
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r-- | passt.c | 15 | ||||
-rw-r--r-- | tcp.c | 55 | ||||
-rw-r--r-- | udp.c | 105 | ||||
-rw-r--r-- | udp.h | 1 | ||||
-rw-r--r-- | util.c | 14 | ||||
-rw-r--r-- | util.h | 10 |
6 files changed, 140 insertions, 60 deletions
@@ -329,15 +329,27 @@ static int get_bound_ports_ns(void *arg) procfs_scan_listen("tcp", c->tcp.port4_to_tap); procfs_scan_listen("tcp", c->udp.port4_to_tap); procfs_scan_listen("udp", c->udp.port4_to_tap); + + procfs_scan_listen("tcp", c->tcp.port4_to_ns); + procfs_scan_listen("tcp", c->udp.port4_to_ns); + procfs_scan_listen("udp", c->udp.port4_to_ns); } if (c->v6) { if (c->v4) { + procfs_scan_listen("tcp6", c->tcp.port4_to_tap); + procfs_scan_listen("tcp6", c->udp.port4_to_tap); + procfs_scan_listen("udp6", c->udp.port4_to_tap); + procfs_scan_listen("tcp6", c->tcp.port4_to_ns); procfs_scan_listen("tcp6", c->udp.port4_to_ns); procfs_scan_listen("udp6", c->udp.port4_to_ns); } + procfs_scan_listen("tcp6", c->tcp.port6_to_tap); + procfs_scan_listen("tcp6", c->udp.port6_to_tap); + procfs_scan_listen("udp6", c->udp.port6_to_tap); + procfs_scan_listen("tcp6", c->tcp.port6_to_ns); procfs_scan_listen("tcp6", c->udp.port6_to_ns); procfs_scan_listen("udp6", c->udp.port6_to_ns); @@ -359,16 +371,19 @@ static void get_bound_ports(struct ctx *c) if (c->v4) { procfs_scan_listen("tcp", c->tcp.port4_to_init); + procfs_scan_listen("tcp", c->udp.port4_to_init); procfs_scan_listen("udp", c->udp.port4_to_init); } if (c->v6) { if (c->v4) { procfs_scan_listen("tcp6", c->tcp.port4_to_init); + procfs_scan_listen("tcp6", c->udp.port4_to_init); procfs_scan_listen("udp6", c->udp.port4_to_init); } procfs_scan_listen("tcp6", c->tcp.port6_to_init); + procfs_scan_listen("tcp6", c->udp.port6_to_init); procfs_scan_listen("udp6", c->udp.port6_to_init); } @@ -2095,7 +2095,8 @@ static int tcp_sock_init_ns(void *arg) continue; tref.index = port; - sock_l4(c, AF_INET, IPPROTO_TCP, port, 1, tref.u32); + sock_l4(c, AF_INET, IPPROTO_TCP, port, BIND_LOOPBACK, + tref.u32); } } @@ -2106,7 +2107,8 @@ static int tcp_sock_init_ns(void *arg) continue; tref.index = port; - sock_l4(c, AF_INET6, IPPROTO_TCP, port, 1, tref.u32); + sock_l4(c, AF_INET6, IPPROTO_TCP, port, BIND_LOOPBACK, + tref.u32); } } @@ -2123,6 +2125,7 @@ int tcp_sock_init(struct ctx *c) { union tcp_epoll_ref tref = { .listen = 1 }; char ns_fn_stack[NS_FN_STACK_SIZE]; + enum bind_type tap_bind; in_port_t port; getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); @@ -2130,33 +2133,49 @@ int tcp_sock_init(struct ctx *c) if (c->v4) { tref.v6 = 0; for (port = 0; port < USHRT_MAX; port++) { - if (bitmap_isset(c->tcp.port4_to_ns, port)) + tref.index = port; + + if (bitmap_isset(c->tcp.port4_to_ns, port)) { tref.splice = 1; - else if (bitmap_isset(c->tcp.port4_to_tap, port)) - tref.splice = 0; - else - continue; + sock_l4(c, AF_INET, IPPROTO_TCP, port, + BIND_LOOPBACK, tref.u32); + tap_bind = BIND_EXT; + } else { + tap_bind = BIND_ANY; + } - tref.index = port; - sock_l4(c, AF_INET, IPPROTO_TCP, port, tref.splice, - tref.u32); + if (bitmap_isset(c->tcp.port4_to_tap, port)) { + tref.splice = 0; + sock_l4(c, AF_INET, IPPROTO_TCP, port, + tap_bind, tref.u32); + } } + + tcp_sock4_iov_init(); } if (c->v6) { tref.v6 = 1; for (port = 0; port < USHRT_MAX; port++) { - if (bitmap_isset(c->tcp.port6_to_ns, port)) + tref.index = port; + + if (bitmap_isset(c->tcp.port6_to_ns, port)) { tref.splice = 1; - else if (bitmap_isset(c->tcp.port6_to_tap, port)) - tref.splice = 0; - else - continue; + sock_l4(c, AF_INET6, IPPROTO_TCP, port, + BIND_LOOPBACK, tref.u32); + tap_bind = BIND_EXT; + } else { + tap_bind = BIND_ANY; + } - tref.index = port; - sock_l4(c, AF_INET6, IPPROTO_TCP, port, tref.splice, - tref.u32); + if (bitmap_isset(c->tcp.port6_to_tap, port)) { + tref.splice = 0; + sock_l4(c, AF_INET6, IPPROTO_TCP, port, + tap_bind, tref.u32); + } } + + tcp_sock6_iov_init(); } if (c->mode == MODE_PASTA) { @@ -125,11 +125,13 @@ * @sock: Socket bound to source port used as index * @ts: Activity timestamp from tap, used for socket aging * @ts_local: Timestamp of tap packet to gateway address, aging for local bind + * @loopback: Whether local bind should use loopback address as source */ struct udp_tap_port { int sock; time_t ts; time_t ts_local; + int loopback; }; /** @@ -201,7 +203,7 @@ udp4_l2_buf[UDP_TAP_FRAMES] = { }; /** - * udp4_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections + * udp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections * @s_in6: Source socket address, filled in by recvmmsg() * @vnet_len: 4-byte qemu vnet buffer length descriptor, only for passt mode * @eh: Pre-filled Ethernet header @@ -644,19 +646,25 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, b->ip6h.payload_len = htons(udp6_l2_mh_sock[i].msg_len + sizeof(b->uh)); - if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr) || - !memcmp(&b->s_in6.sin6_addr, &c->addr6_seen, - sizeof(c->addr6))) { + if (IN6_IS_ADDR_LINKLOCAL(&b->s_in6.sin6_addr)) { + b->ip6h.daddr = c->addr6_ll_seen; + b->ip6h.saddr = b->s_in6.sin6_addr; + } else if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr) || + !memcmp(&b->s_in6.sin6_addr, &c->addr6_seen, + sizeof(c->addr6))) { in_port_t src = htons(b->s_in6.sin6_port); - b->ip6h.daddr = c->addr6_seen; + b->ip6h.daddr = c->addr6_ll_seen; b->ip6h.saddr = c->gw6; udp_tap_map[V6][src].ts_local = now->tv_sec; + + if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr)) + udp_tap_map[V6][src].loopback = 1; + else + udp_tap_map[V6][src].loopback = 0; + bitmap_set(udp_act[V6][UDP_ACT_TAP], src); - } else if (IN6_IS_ADDR_LINKLOCAL(&b->s_in6.sin6_addr)) { - b->ip6h.daddr = c->addr6_ll_seen; - b->ip6h.saddr = b->s_in6.sin6_addr; } else { b->ip6h.daddr = c->addr6_seen; b->ip6h.saddr = b->s_in6.sin6_addr; @@ -733,6 +741,12 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, b->iph.saddr = c->gw4; udp_tap_map[V4][src].ts_local = now->tv_sec; + + if (b->s_in.sin_addr.s_addr == c->addr4_seen) + udp_tap_map[V4][src].loopback = 0; + else + udp_tap_map[V4][src].loopback = 1; + bitmap_set(udp_act[V4][UDP_ACT_TAP], src); } else { b->iph.saddr = b->s_in.sin_addr.s_addr; @@ -840,8 +854,12 @@ int udp_tap_handler(struct ctx *c, int af, void *addr, udp_tap_map[V4][src].ts = now->tv_sec; if (s_in.sin_addr.s_addr == c->gw4 && - udp_tap_map[V4][dst].ts_local) - s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + udp_tap_map[V4][dst].ts_local) { + if (udp_tap_map[V4][dst].loopback) + s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + else + s_in.sin_addr.s_addr = c->addr4_seen; + } } else { s_in6 = (struct sockaddr_in6) { .sin6_family = AF_INET6, @@ -868,8 +886,12 @@ int udp_tap_handler(struct ctx *c, int af, void *addr, udp_tap_map[V6][src].ts = now->tv_sec; if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) && - udp_tap_map[V6][dst].ts_local) - s_in6.sin6_addr = in6addr_loopback; + udp_tap_map[V6][dst].ts_local) { + if (udp_tap_map[V6][dst].loopback) + s_in6.sin6_addr = in6addr_loopback; + else + s_in6.sin6_addr = c->addr6_seen; + } } for (i = 0; i < count; i++) { @@ -911,7 +933,8 @@ int udp_sock_init_ns(void *arg) continue; uref.port = port; - sock_l4(c, AF_INET, IPPROTO_UDP, port, 1, uref.u32); + sock_l4(c, AF_INET, IPPROTO_UDP, port, BIND_LOOPBACK, + uref.u32); } } @@ -922,7 +945,8 @@ int udp_sock_init_ns(void *arg) continue; uref.port = port; - sock_l4(c, AF_INET6, IPPROTO_UDP, port, 1, uref.u32); + sock_l4(c, AF_INET6, IPPROTO_UDP, port, BIND_LOOPBACK, + uref.u32); } } @@ -992,24 +1016,31 @@ int udp_sock_init(struct ctx *c) { union udp_epoll_ref uref = { .bound = 1 }; char ns_fn_stack[NS_FN_STACK_SIZE]; + enum bind_type tap_bind; in_port_t port; int s; if (c->v4) { uref.v6 = 0; for (port = 0; port < USHRT_MAX; port++) { - if (bitmap_isset(c->udp.port4_to_ns, port)) + uref.port = port; + + if (bitmap_isset(c->udp.port4_to_ns, port)) { uref.splice = UDP_TO_NS; - else if (bitmap_isset(c->udp.port4_to_tap, port)) - uref.splice = 0; - else - continue; + sock_l4(c, AF_INET, IPPROTO_UDP, port, + BIND_LOOPBACK, uref.u32); + tap_bind = BIND_EXT; + } else { + tap_bind = BIND_ANY; + } - uref.port = port; - s = sock_l4(c, AF_INET, IPPROTO_UDP, port, - uref.splice == UDP_TO_NS, uref.u32); - if (!uref.splice && s > 0) - udp_tap_map[V4][port].sock = s; + if (bitmap_isset(c->udp.port4_to_tap, port)) { + uref.splice = 0; + s = sock_l4(c, AF_INET, IPPROTO_UDP, port, + tap_bind, uref.u32); + if (s > 0) + udp_tap_map[V4][port].sock = s; + } } udp_sock4_iov_init(); @@ -1018,18 +1049,24 @@ int udp_sock_init(struct ctx *c) if (c->v6) { uref.v6 = 1; for (port = 0; port < USHRT_MAX; port++) { - if (bitmap_isset(c->udp.port6_to_ns, port)) + uref.port = port; + + if (bitmap_isset(c->udp.port6_to_ns, port)) { uref.splice = UDP_TO_NS; - else if (bitmap_isset(c->udp.port6_to_tap, port)) - uref.splice = 0; - else - continue; + sock_l4(c, AF_INET6, IPPROTO_UDP, port, + BIND_LOOPBACK, uref.u32); + tap_bind = BIND_EXT; + } else { + tap_bind = BIND_ANY; + } - uref.port = port; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, port, - uref.splice == UDP_TO_NS, uref.u32); - if (!uref.splice && s > 0) - udp_tap_map[V6][port].sock = s; + if (bitmap_isset(c->udp.port6_to_tap, port)) { + uref.splice = 0; + s = sock_l4(c, AF_INET6, IPPROTO_UDP, port, + tap_bind, uref.u32); + if (s > 0) + udp_tap_map[V6][port].sock = s; + } } udp_sock6_iov_init(); @@ -35,7 +35,6 @@ union udp_epoll_ref { uint32_t u32; }; - /** * struct udp_ctx - Execution context for UDP * @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap @@ -125,13 +125,13 @@ char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto) * @af: Address family, AF_INET or AF_INET6 * @proto: Protocol number * @port: Port, host order - * @lo: Bind to loopback address only, if set + * @bind_type: Type of address for binding * @data: epoll reference portion for protocol handlers * * Return: newly created socket, -1 on error */ -int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, int lo, - uint32_t data) +int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, + enum bind_type bind_addr, uint32_t data) { union epoll_ref ref = { .proto = proto, .data = data }; struct sockaddr_in addr4 = { @@ -161,16 +161,20 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, int lo, ref.s = fd; if (af == AF_INET) { - if (lo) + if (bind_addr == BIND_LOOPBACK) addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + else if (bind_addr == BIND_EXT) + addr4.sin_addr.s_addr = c->addr4; else addr4.sin_addr.s_addr = htonl(INADDR_ANY); sa = (const struct sockaddr *)&addr4; sl = sizeof(addr4); } else { - if (lo) + if (bind_addr == BIND_LOOPBACK) addr6.sin6_addr = in6addr_loopback; + else if (bind_addr == BIND_EXT) + addr6.sin6_addr = c->addr6; else addr6.sin6_addr = in6addr_any; @@ -115,11 +115,17 @@ void debug(const char *format, ...); #include <linux/ip.h> #include <limits.h> +enum bind_type { + BIND_ANY = 0, + BIND_LOOPBACK, + BIND_EXT, +}; + struct ctx; char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto); -int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, int lo, - uint32_t data); +int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, + enum bind_type bind_addr, uint32_t data); int timespec_diff_ms(struct timespec *a, struct timespec *b); void bitmap_set(uint8_t *map, int bit); void bitmap_clear(uint8_t *map, int bit); |