aboutgitcodebugslistschat
path: root/tcp.c
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-09-27 05:24:30 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-09-27 11:23:44 +0200
commit9657b6ed05cc67273f6bab1751ae98ca4e89f114 (patch)
tree556c1dc6e035322d1c9525703863175b9e86d542 /tcp.c
parente69e13671dcbf3b6964e7bd9d485f267c5fa03cb (diff)
downloadpasst-9657b6ed05cc67273f6bab1751ae98ca4e89f114.tar
passt-9657b6ed05cc67273f6bab1751ae98ca4e89f114.tar.gz
passt-9657b6ed05cc67273f6bab1751ae98ca4e89f114.tar.bz2
passt-9657b6ed05cc67273f6bab1751ae98ca4e89f114.tar.lz
passt-9657b6ed05cc67273f6bab1751ae98ca4e89f114.tar.xz
passt-9657b6ed05cc67273f6bab1751ae98ca4e89f114.tar.zst
passt-9657b6ed05cc67273f6bab1751ae98ca4e89f114.zip
conf, tcp: Periodic detection of bound ports for pasta port forwarding
Detecting bound ports at start-up time isn't terribly useful: do this periodically instead, if configured. This is only implemented for TCP at the moment, UDP is somewhat more complicated: leave a TODO there. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'tcp.c')
-rw-r--r--tcp.c292
1 files changed, 239 insertions, 53 deletions
diff --git a/tcp.c b/tcp.c
index 830bf4a..7de8fa9 100644
--- a/tcp.c
+++ b/tcp.c
@@ -334,6 +334,7 @@
#include "tap.h"
#include "siphash.h"
#include "pcap.h"
+#include "conf.h"
#define MAX_TAP_CONNS (128 * 1024)
#define MAX_SPLICE_CONNS (128 * 1024)
@@ -363,6 +364,8 @@
#define TCP_SPLICE_PIPE_POOL_SIZE 256
#define REFILL_INTERVAL 1000
+#define PORT_DETECT_INTERVAL 1000
+
/* We need to include <linux/tcp.h> for tcpi_bytes_acked, instead of
* <netinet/tcp.h>, but that doesn't include a definition for SOL_TCP
*/
@@ -525,6 +528,11 @@ struct tcp_splice_conn {
static in_port_t tcp_port_delta_to_tap [USHRT_MAX];
static in_port_t tcp_port_delta_to_init [USHRT_MAX];
+/* Listening sockets, used for automatic port forwarding in pasta mode only */
+static int tcp_sock_init_lo [USHRT_MAX][IP_VERSIONS];
+static int tcp_sock_init_ext [USHRT_MAX][IP_VERSIONS];
+static int tcp_sock_ns [USHRT_MAX][IP_VERSIONS];
+
/**
* tcp_remap_to_tap() - Set delta for port translation toward guest/tap
* @port: Original destination port, host order
@@ -3002,6 +3010,93 @@ smaller:
}
/**
+ * tcp_sock_init_one() - Initialise listening sockets for a given port
+ * @c: Execution context
+ * @ns: In pasta mode, if set, bind with loopback address in namespace
+ * @port: Port, host order
+ */
+static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
+{
+ union tcp_epoll_ref tref = { .listen = 1 };
+ int s;
+
+ if (ns)
+ tref.index = (in_port_t)(port + tcp_port_delta_to_init[port]);
+ else
+ tref.index = (in_port_t)(port + tcp_port_delta_to_tap[port]);
+
+ if (c->v4) {
+ tref.v6 = 0;
+
+ tref.splice = 0;
+ if (!ns) {
+ s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
+ c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
+ tref.u32);
+ if (s > 0)
+ tcp_sock_set_bufsize(s);
+ else
+ s = -1;
+
+ if (c->tcp.init_detect_ports)
+ tcp_sock_init_ext[port][V4] = s;
+ }
+
+ if (c->mode == MODE_PASTA) {
+ tref.splice = 1;
+ s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
+ BIND_LOOPBACK, tref.u32);
+ if (s > 0)
+ tcp_sock_set_bufsize(s);
+ else
+ s = -1;
+
+ if (c->tcp.ns_detect_ports) {
+ if (ns)
+ tcp_sock_ns[port][V4] = s;
+ else
+ tcp_sock_init_lo[port][V4] = s;
+ }
+ }
+ }
+
+ if (c->v6) {
+ tref.v6 = 1;
+
+ tref.splice = 0;
+ if (!ns) {
+ s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
+ c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
+ tref.u32);
+ if (s > 0)
+ tcp_sock_set_bufsize(s);
+ else
+ s = -1;
+
+ if (c->tcp.init_detect_ports)
+ tcp_sock_init_ext[port][V6] = s;
+ }
+
+ if (c->mode == MODE_PASTA) {
+ tref.splice = 1;
+ s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
+ BIND_LOOPBACK, tref.u32);
+ if (s > 0)
+ tcp_sock_set_bufsize(s);
+ else
+ s = -1;
+
+ if (c->tcp.ns_detect_ports) {
+ if (ns)
+ tcp_sock_ns[port][V6] = s;
+ else
+ tcp_sock_init_lo[port][V6] = s;
+ }
+ }
+ }
+}
+
+/**
* tcp_sock_init_ns() - Bind sockets in namespace for inbound connections
* @arg: Execution context
*
@@ -3009,10 +3104,8 @@ smaller:
*/
static int tcp_sock_init_ns(void *arg)
{
- union tcp_epoll_ref tref = { .listen = 1, .splice = 1 };
struct ctx *c = (struct ctx *)arg;
in_port_t port;
- int s;
ns_enter(c->pasta_pid);
@@ -3020,21 +3113,7 @@ static int tcp_sock_init_ns(void *arg)
if (!bitmap_isset(c->tcp.port_to_init, port))
continue;
- tref.index = (in_port_t)(port + tcp_port_delta_to_init[port]);
-
- if (c->v4) {
- tref.v6 = 0;
- s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
- BIND_LOOPBACK, tref.u32);
- tcp_sock_set_bufsize(s);
- }
-
- if (c->v6) {
- tref.v6 = 1;
- s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
- BIND_LOOPBACK, tref.u32);
- tcp_sock_set_bufsize(s);
- }
+ tcp_sock_init_one(c, 1, port);
}
return 0;
@@ -3128,9 +3207,7 @@ static int tcp_sock_refill(void *arg)
int tcp_sock_init(struct ctx *c, struct timespec *now)
{
struct tcp_sock_refill_arg refill_arg = { c, 0 };
- union tcp_epoll_ref tref = { .listen = 1 };
in_port_t port;
- int s;
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
@@ -3138,40 +3215,7 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
if (!bitmap_isset(c->tcp.port_to_tap, port))
continue;
- tref.index = (in_port_t)(port + tcp_port_delta_to_tap[port]);
- if (c->v4) {
- tref.v6 = 0;
-
- tref.splice = 0;
- s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
- c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
- tref.u32);
- tcp_sock_set_bufsize(s);
-
- if (c->mode == MODE_PASTA) {
- tref.splice = 1;
- s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
- BIND_LOOPBACK, tref.u32);
- tcp_sock_set_bufsize(s);
- }
- }
-
- if (c->v6) {
- tref.v6 = 1;
-
- tref.splice = 0;
- s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
- c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
- tref.u32);
- tcp_sock_set_bufsize(s);
-
- if (c->mode == MODE_PASTA) {
- tref.splice = 1;
- s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
- BIND_LOOPBACK, tref.u32);
- tcp_sock_set_bufsize(s);
- }
- }
+ tcp_sock_init_one(c, 0, port);
}
if (c->v4)
@@ -3190,6 +3234,8 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
refill_arg.ns = 1;
NS_CALL(tcp_sock_refill, &refill_arg);
tcp_splice_pipe_refill(c);
+
+ c->tcp.port_detect_ts = *now;
}
return 0;
@@ -3284,6 +3330,122 @@ static void tcp_timer_one(struct ctx *c, struct tcp_tap_conn *conn,
}
/**
+ * struct tcp_port_detect_arg - Arguments for tcp_port_detect()
+ * @c: Execution context
+ * @detect_in_ns: Detect ports bound in namespace, not in init
+ */
+struct tcp_port_detect_arg {
+ struct ctx *c;
+ int detect_in_ns;
+};
+
+/**
+ * tcp_port_detect() - Detect ports bound in namespace or init
+ * @arg: See struct tcp_port_detect_arg
+ *
+ * Return: 0
+ */
+static int tcp_port_detect(void *arg)
+{
+ struct tcp_port_detect_arg *a = (struct tcp_port_detect_arg *)arg;
+
+ if (a->detect_in_ns) {
+ ns_enter(a->c->pasta_pid);
+
+ get_bound_ports(a->c, 1, IPPROTO_TCP);
+ } else {
+ get_bound_ports(a->c, 0, IPPROTO_TCP);
+ }
+
+ return 0;
+}
+
+/**
+ * struct tcp_port_rebind_arg - Arguments for tcp_port_rebind()
+ * @c: Execution context
+ * @bind_in_ns: Rebind ports in namespace, not in init
+ */
+struct tcp_port_rebind_arg {
+ struct ctx *c;
+ int bind_in_ns;
+};
+
+/**
+ * tcp_port_rebind() - Rebind ports in namespace or init
+ * @arg: See struct tcp_port_rebind_arg
+ *
+ * Return: 0
+ */
+static int tcp_port_rebind(void *arg)
+{
+ struct tcp_port_rebind_arg *a = (struct tcp_port_rebind_arg *)arg;
+ in_port_t port;
+
+ if (a->bind_in_ns) {
+ ns_enter(a->c->pasta_pid);
+
+ for (port = 0; port < USHRT_MAX; port++) {
+ if (!bitmap_isset(a->c->tcp.port_to_init, port)) {
+ if (tcp_sock_ns[port][V4] > 0) {
+ close(tcp_sock_ns[port][V4]);
+ tcp_sock_ns[port][V4] = 0;
+ }
+
+ if (tcp_sock_ns[port][V6] > 0) {
+ close(tcp_sock_ns[port][V6]);
+ tcp_sock_ns[port][V6] = 0;
+ }
+
+ continue;
+ }
+
+ /* Don't loop back our own ports */
+ if (bitmap_isset(a->c->tcp.port_to_tap, port))
+ continue;
+
+ if ((a->c->v4 && !tcp_sock_ns[port][V4]) ||
+ (a->c->v6 && !tcp_sock_ns[port][V6]))
+ tcp_sock_init_one(a->c, 1, port);
+ }
+ } else {
+ for (port = 0; port < USHRT_MAX; port++) {
+ if (!bitmap_isset(a->c->tcp.port_to_tap, port)) {
+ if (tcp_sock_init_ext[port][V4] > 0) {
+ close(tcp_sock_init_ext[port][V4]);
+ tcp_sock_init_ext[port][V4] = 0;
+ }
+
+ if (tcp_sock_init_ext[port][V6] > 0) {
+ close(tcp_sock_init_ext[port][V6]);
+ tcp_sock_init_ext[port][V6] = 0;
+ }
+
+ if (tcp_sock_init_lo[port][V4] > 0) {
+ close(tcp_sock_init_lo[port][V4]);
+ tcp_sock_init_lo[port][V4] = 0;
+ }
+
+ if (tcp_sock_init_lo[port][V6] > 0) {
+ close(tcp_sock_init_lo[port][V6]);
+ tcp_sock_init_lo[port][V6] = 0;
+ }
+ continue;
+ }
+
+ /* Don't loop back our own ports */
+ if (bitmap_isset(a->c->tcp.port_to_init, port))
+ continue;
+
+ if ((a->c->v4 && !tcp_sock_init_ext[port][V4]) ||
+ (a->c->v6 && !tcp_sock_init_ext[port][V6]))
+ tcp_sock_init_one(a->c, 0, port);
+ }
+ }
+
+ return 0;
+}
+
+/**
* tcp_timer() - Scan activity bitmap for sockets waiting for timed events
* @c: Execution context
* @ts: Timestamp from caller
@@ -3293,6 +3455,30 @@ void tcp_timer(struct ctx *c, struct timespec *now)
struct tcp_sock_refill_arg refill_arg = { c, 0 };
int i;
+ if (c->mode == MODE_PASTA) {
+ if (timespec_diff_ms(now, &c->tcp.port_detect_ts) >
+ PORT_DETECT_INTERVAL) {
+ struct tcp_port_detect_arg detect_arg = { c, 0 };
+ struct tcp_port_rebind_arg rebind_arg = { c, 0 };
+
+ if (c->tcp.init_detect_ports) {
+ detect_arg.detect_in_ns = 0;
+ tcp_port_detect(&detect_arg);
+ rebind_arg.bind_in_ns = 1;
+ NS_CALL(tcp_port_rebind, &rebind_arg);
+ }
+
+ if (c->tcp.ns_detect_ports) {
+ detect_arg.detect_in_ns = 1;
+ NS_CALL(tcp_port_detect, &detect_arg);
+ rebind_arg.bind_in_ns = 0;
+ tcp_port_rebind(&rebind_arg);
+ }
+
+ c->tcp.port_detect_ts = *now;
+ }
+ }
+
if (timespec_diff_ms(now, &c->tcp.refill_ts) > REFILL_INTERVAL) {
tcp_sock_refill(&refill_arg);
if (c->mode == MODE_PASTA) {