aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--flow.c53
-rw-r--r--flow_table.h2
-rw-r--r--fwd.c148
-rw-r--r--fwd.h9
-rw-r--r--tcp.c103
-rw-r--r--tcp_splice.c64
-rw-r--r--tcp_splice.h5
7 files changed, 245 insertions, 139 deletions
diff --git a/flow.c b/flow.c
index c4f1236..c1af136 100644
--- a/flow.c
+++ b/flow.c
@@ -400,6 +400,59 @@ const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
return tgt;
}
+
+/**
+ * flow_target() - Determine where flow should forward to, and move to TGT
+ * @c: Execution context
+ * @flow: Flow to forward
+ * @proto: Protocol
+ *
+ * Return: pointer to the target flowside information
+ */
+const struct flowside *flow_target(const struct ctx *c, union flow *flow,
+ uint8_t proto)
+{
+ char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
+ struct flow_common *f = &flow->f;
+ const struct flowside *ini = &f->side[INISIDE];
+ struct flowside *tgt = &f->side[TGTSIDE];
+ uint8_t tgtpif = PIF_NONE;
+
+ ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI);
+ ASSERT(f->type == FLOW_TYPE_NONE);
+ ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE);
+ ASSERT(flow->f.state == FLOW_STATE_INI);
+
+ switch (f->pif[INISIDE]) {
+ case PIF_TAP:
+ tgtpif = fwd_nat_from_tap(c, proto, ini, tgt);
+ break;
+
+ case PIF_SPLICE:
+ tgtpif = fwd_nat_from_splice(c, proto, ini, tgt);
+ break;
+
+ case PIF_HOST:
+ tgtpif = fwd_nat_from_host(c, proto, ini, tgt);
+ break;
+
+ default:
+ flow_err(flow, "No rules to forward %s [%s]:%hu -> [%s]:%hu",
+ pif_name(f->pif[INISIDE]),
+ inany_ntop(&ini->eaddr, estr, sizeof(estr)),
+ ini->eport,
+ inany_ntop(&ini->faddr, fstr, sizeof(fstr)),
+ ini->fport);
+ }
+
+ if (tgtpif == PIF_NONE)
+ return NULL;
+
+ f->pif[TGTSIDE] = tgtpif;
+ flow_set_state(f, FLOW_STATE_TGT);
+ return tgt;
+}
+
/**
* flow_set_type() - Set type and move to TYPED
* @flow: Flow to change state
diff --git a/flow_table.h b/flow_table.h
index aabdbb7..9d912c8 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -138,6 +138,8 @@ const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
sa_family_t af,
const void *saddr, in_port_t sport,
const void *daddr, in_port_t dport);
+const struct flowside *flow_target(const struct ctx *c, union flow *flow,
+ uint8_t proto);
union flow *flow_set_type(union flow *flow, enum flow_type type);
#define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_)
diff --git a/fwd.c b/fwd.c
index d3f1798..3288b0d 100644
--- a/fwd.c
+++ b/fwd.c
@@ -25,6 +25,7 @@
#include "fwd.h"
#include "passt.h"
#include "lineread.h"
+#include "flow_table.h"
/* See enum in kernel's include/net/tcp_states.h */
#define UDP_LISTEN 0x07
@@ -154,3 +155,150 @@ void fwd_scan_ports_init(struct ctx *c)
&c->tcp.fwd_out, &c->tcp.fwd_in);
}
}
+
+/**
+ * fwd_nat_from_tap() - Determine to forward a flow from the tap interface
+ * @c: Execution context
+ * @proto: Protocol (IP L4 protocol number)
+ * @ini: Flow address information of the initiating side
+ * @tgt: Flow address information on the target side (updated)
+ *
+ * Return: pif of the target interface to forward the flow to, PIF_NONE if the
+ * flow cannot or should not be forwarded at all.
+ */
+uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
+ const struct flowside *ini, struct flowside *tgt)
+{
+ (void)proto;
+
+ tgt->eaddr = ini->faddr;
+ tgt->eport = ini->fport;
+
+ if (!c->no_map_gw) {
+ if (inany_equals4(&tgt->eaddr, &c->ip4.gw))
+ tgt->eaddr = inany_loopback4;
+ else if (inany_equals6(&tgt->eaddr, &c->ip6.gw))
+ tgt->eaddr = inany_loopback6;
+ }
+
+ /* The relevant addr_out controls the host side source address. This
+ * may be unspecified, which allows the kernel to pick an address.
+ */
+ if (inany_v4(&tgt->eaddr))
+ tgt->faddr = inany_from_v4(c->ip4.addr_out);
+ else
+ tgt->faddr.a6 = c->ip6.addr_out;
+
+ /* Let the kernel pick a host side source port */
+ tgt->fport = 0;
+
+ return PIF_HOST;
+}
+
+/**
+ * fwd_nat_from_splice() - Determine to forward a flow from the splice interface
+ * @c: Execution context
+ * @proto: Protocol (IP L4 protocol number)
+ * @ini: Flow address information of the initiating side
+ * @tgt: Flow address information on the target side (updated)
+ *
+ * Return: pif of the target interface to forward the flow to, PIF_NONE if the
+ * flow cannot or should not be forwarded at all.
+ */
+uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
+ const struct flowside *ini, struct flowside *tgt)
+{
+ if (!inany_is_loopback(&ini->eaddr) ||
+ (!inany_is_loopback(&ini->faddr) && !inany_is_unspecified(&ini->faddr))) {
+ char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
+
+ debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu",
+ pif_name(PIF_SPLICE),
+ inany_ntop(&ini->eaddr, estr, sizeof(estr)), ini->eport,
+ inany_ntop(&ini->faddr, fstr, sizeof(fstr)), ini->fport);
+ return PIF_NONE;
+ }
+
+ if (inany_v4(&ini->eaddr))
+ tgt->eaddr = inany_loopback4;
+ else
+ tgt->eaddr = inany_loopback6;
+
+ /* Preserve the specific loopback adddress used, but let the kernel pick
+ * a source port on the target side
+ */
+ tgt->faddr = ini->eaddr;
+ tgt->fport = 0;
+
+ tgt->eport = ini->fport;
+ if (proto == IPPROTO_TCP)
+ tgt->eport += c->tcp.fwd_out.delta[tgt->eport];
+
+ /* Let the kernel pick a host side source port */
+ tgt->fport = 0;
+
+ return PIF_HOST;
+}
+
+/**
+ * fwd_nat_from_host() - Determine to forward a flow from the host interface
+ * @c: Execution context
+ * @proto: Protocol (IP L4 protocol number)
+ * @ini: Flow address information of the initiating side
+ * @tgt: Flow address information on the target side (updated)
+ *
+ * Return: pif of the target interface to forward the flow to, PIF_NONE if the
+ * flow cannot or should not be forwarded at all.
+ */
+uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
+ const struct flowside *ini, struct flowside *tgt)
+{
+ /* Common for spliced and non-spliced cases */
+ tgt->eport = ini->fport;
+ if (proto == IPPROTO_TCP)
+ tgt->eport += c->tcp.fwd_in.delta[tgt->eport];
+
+ if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
+ proto == IPPROTO_TCP) {
+ /* spliceable */
+
+ /* Preserve the specific loopback adddress used, but let the
+ * kernel pick a source port on the target side
+ */
+ tgt->faddr = ini->eaddr;
+ tgt->fport = 0;
+
+ if (inany_v4(&ini->eaddr))
+ tgt->eaddr = inany_loopback4;
+ else
+ tgt->eaddr = inany_loopback6;
+ return PIF_SPLICE;
+ }
+
+ tgt->faddr = ini->eaddr;
+ tgt->fport = ini->eport;
+
+ if (inany_is_loopback4(&tgt->faddr) ||
+ inany_is_unspecified4(&tgt->faddr) ||
+ inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) {
+ tgt->faddr = inany_from_v4(c->ip4.gw);
+ } else if (inany_is_loopback6(&tgt->faddr) ||
+ inany_equals6(&tgt->faddr, &c->ip6.addr_seen) ||
+ inany_equals6(&tgt->faddr, &c->ip6.addr)) {
+ if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
+ tgt->faddr.a6 = c->ip6.gw;
+ else
+ tgt->faddr.a6 = c->ip6.addr_ll;
+ }
+
+ if (inany_v4(&tgt->faddr)) {
+ tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
+ } else {
+ if (inany_is_linklocal6(&tgt->faddr))
+ tgt->eaddr.a6 = c->ip6.addr_ll_seen;
+ else
+ tgt->eaddr.a6 = c->ip6.addr_seen;
+ }
+
+ return PIF_TAP;
+}
diff --git a/fwd.h b/fwd.h
index 41645d7..b4aa8d5 100644
--- a/fwd.h
+++ b/fwd.h
@@ -7,6 +7,8 @@
#ifndef FWD_H
#define FWD_H
+struct flowside;
+
/* Number of ports for both TCP and UDP */
#define NUM_PORTS (1U << 16)
@@ -42,4 +44,11 @@ void fwd_scan_ports_udp(struct fwd_ports *fwd, const struct fwd_ports *rev,
const struct fwd_ports *tcp_rev);
void fwd_scan_ports_init(struct ctx *c);
+uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
+ const struct flowside *ini, struct flowside *tgt);
+uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
+ const struct flowside *ini, struct flowside *tgt);
+uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
+ const struct flowside *ini, struct flowside *tgt);
+
#endif /* FWD_H */
diff --git a/tcp.c b/tcp.c
index b6eca5d..0c66ac8 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1470,7 +1470,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
{
in_port_t srcport = ntohs(th->source);
in_port_t dstport = ntohs(th->dest);
- union inany_addr srcaddr, dstaddr; /* FIXME: Avoid bulky temporaries */
const struct flowside *ini, *tgt;
struct tcp_tap_conn *conn;
union sockaddr_inany sa;
@@ -1485,34 +1484,16 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
ini = flow_initiate_af(flow, PIF_TAP,
af, saddr, srcport, daddr, dstport);
- dstaddr = ini->faddr;
- if (!c->no_map_gw) {
- if (inany_equals4(&dstaddr, &c->ip4.gw))
- dstaddr = inany_loopback4;
- else if (inany_equals6(&dstaddr, &c->ip6.gw))
- dstaddr = inany_loopback6;
-
- }
+ if (!(tgt = flow_target(c, flow, IPPROTO_TCP)))
+ goto cancel;
- if (inany_is_linklocal6(&dstaddr)) {
- srcaddr.a6 = c->ip6.addr_ll;
- } else if (inany_is_loopback(&dstaddr)) {
- srcaddr = dstaddr;
- } else if (inany_v4(&dstaddr)) {
- if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out))
- srcaddr = inany_from_v4(c->ip4.addr_out);
- else
- srcaddr = inany_any4;
- } else {
- if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out))
- srcaddr.a6 = c->ip6.addr_out;
- else
- srcaddr = inany_any6;
+ if (flow->f.pif[TGTSIDE] != PIF_HOST) {
+ flow_err(flow, "No support for forwarding TCP from %s to %s",
+ pif_name(flow->f.pif[INISIDE]),
+ pif_name(flow->f.pif[TGTSIDE]));
+ goto cancel;
}
- tgt = flow_target_af(flow, PIF_HOST, AF_INET6,
- &srcaddr, 0, /* Kernel decides source port */
- &dstaddr, dstport);
conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0 ||
@@ -2061,62 +2042,19 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
}
/**
- * tcp_snat_inbound() - Translate source address for inbound data if needed
- * @c: Execution context
- * @addr: Source address of inbound packet/connection
- */
-static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
-{
- if (inany_is_loopback4(addr) ||
- inany_is_unspecified4(addr) ||
- inany_equals4(addr, &c->ip4.addr_seen)) {
- *addr = inany_from_v4(c->ip4.gw);
- } else if (inany_is_loopback6(addr) ||
- inany_equals6(addr, &c->ip6.addr_seen) ||
- inany_equals6(addr, &c->ip6.addr)) {
- if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
- addr->a6 = c->ip6.gw;
- else
- addr->a6 = c->ip6.addr_ll;
- }
-}
-
-/**
* tcp_tap_conn_from_sock() - Initialize state for non-spliced connection
* @c: Execution context
- * @dstport: Destination port for connection (host side)
* @flow: flow to initialise
* @s: Accepted socket
* @sa: Peer socket address (from accept())
* @now: Current timestamp
*/
-static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
- union flow *flow, int s,
- const union sockaddr_inany *sa,
+static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s,
const struct timespec *now)
{
- union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */
- struct tcp_tap_conn *conn;
- in_port_t srcport;
+ struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
uint64_t hash;
- inany_from_sockaddr(&saddr, &srcport, sa);
- tcp_snat_inbound(c, &saddr);
-
- if (inany_v4(&saddr)) {
- daddr = inany_from_v4(c->ip4.addr_seen);
- } else {
- if (inany_is_linklocal6(&saddr))
- daddr.a6 = c->ip6.addr_ll_seen;
- else
- daddr.a6 = c->ip6.addr_seen;
- }
- dstport += c->tcp.fwd_in.delta[dstport];
-
- flow_target_af(flow, PIF_TAP, AF_INET6,
- &saddr, srcport, &daddr, dstport);
- conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
-
conn->sock = s;
conn->timer = -1;
conn->ws_to_tap = conn->ws_from_tap = 0;
@@ -2174,11 +2112,26 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
goto cancel;
}
- if (tcp_splice_conn_from_sock(c, ref.tcp_listen.pif,
- ref.tcp_listen.port, flow, s, &sa))
- return;
+ if (!flow_target(c, flow, IPPROTO_TCP))
+ goto cancel;
+
+ switch (flow->f.pif[TGTSIDE]) {
+ case PIF_SPLICE:
+ case PIF_HOST:
+ tcp_splice_conn_from_sock(c, flow, s);
+ break;
+
+ case PIF_TAP:
+ tcp_tap_conn_from_sock(c, flow, s, now);
+ break;
+
+ default:
+ flow_err(flow, "No support for forwarding TCP from %s to %s",
+ pif_name(flow->f.pif[INISIDE]),
+ pif_name(flow->f.pif[TGTSIDE]));
+ goto cancel;
+ }
- tcp_tap_conn_from_sock(c, ref.tcp_listen.port, flow, s, &sa, now);
return;
cancel:
diff --git a/tcp_splice.c b/tcp_splice.c
index c81daee..473562b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -414,72 +414,18 @@ static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af)
/**
* tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
* @c: Execution context
- * @pif0: pif id of side 0
- * @dstport: Side 0 destination port of connection
* @flow: flow to initialise
* @s0: Accepted (side 0) socket
* @sa: Peer address of connection
*
- * Return: true if able to create a spliced connection, false otherwise
* #syscalls:pasta setsockopt
*/
-bool tcp_splice_conn_from_sock(const struct ctx *c,
- uint8_t pif0, in_port_t dstport,
- union flow *flow, int s0,
- const union sockaddr_inany *sa)
+void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0)
{
- struct tcp_splice_conn *conn;
- union inany_addr src;
- in_port_t srcport;
- sa_family_t af;
- uint8_t tgtpif;
+ struct tcp_splice_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE,
+ tcp_splice);
- if (c->mode != MODE_PASTA)
- return false;
-
- inany_from_sockaddr(&src, &srcport, sa);
- af = inany_v4(&src) ? AF_INET : AF_INET6;
-
- switch (pif0) {
- case PIF_SPLICE:
- if (!inany_is_loopback(&src)) {
- char str[INANY_ADDRSTRLEN];
-
- /* We can't use flow_err() etc. because we haven't set
- * the flow type yet
- */
- warn("Bad source address %s for splice, closing",
- inany_ntop(&src, str, sizeof(str)));
-
- /* We *don't* want to fall back to tap */
- flow_alloc_cancel(flow);
- return true;
- }
-
- tgtpif = PIF_HOST;
- dstport += c->tcp.fwd_out.delta[dstport];
- break;
-
- case PIF_HOST:
- if (!inany_is_loopback(&src))
- return false;
-
- tgtpif = PIF_SPLICE;
- dstport += c->tcp.fwd_in.delta[dstport];
- break;
-
- default:
- return false;
- }
-
- /* FIXME: Record outbound source address when known */
- if (af == AF_INET)
- flow_target_af(flow, tgtpif, AF_INET,
- NULL, 0, &in4addr_loopback, dstport);
- else
- flow_target_af(flow, tgtpif, AF_INET6,
- NULL, 0, &in6addr_loopback, dstport);
- conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice);
+ ASSERT(c->mode == MODE_PASTA);
conn->s[0] = s0;
conn->s[1] = -1;
@@ -493,8 +439,6 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
conn_flag(c, conn, CLOSING);
FLOW_ACTIVATE(conn);
-
- return true;
}
/**
diff --git a/tcp_splice.h b/tcp_splice.h
index ed8f0c5..a20f3e2 100644
--- a/tcp_splice.h
+++ b/tcp_splice.h
@@ -11,10 +11,7 @@ union sockaddr_inany;
void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
uint32_t events);
-bool tcp_splice_conn_from_sock(const struct ctx *c,
- uint8_t pif0, in_port_t dstport,
- union flow *flow, int s0,
- const union sockaddr_inany *sa);
+void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0);
void tcp_splice_init(struct ctx *c);
#endif /* TCP_SPLICE_H */