aboutgitcodebugslistschat
path: root/fwd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fwd.c')
-rw-r--r--fwd.c275
1 files changed, 229 insertions, 46 deletions
diff --git a/fwd.c b/fwd.c
index dea36f6..2829cd2 100644
--- a/fwd.c
+++ b/fwd.c
@@ -27,6 +27,80 @@
#include "lineread.h"
#include "flow_table.h"
+/* Empheral port range: values from RFC 6335 */
+static in_port_t fwd_ephemeral_min = (1 << 15) + (1 << 14);
+static in_port_t fwd_ephemeral_max = NUM_PORTS - 1;
+
+#define PORT_RANGE_SYSCTL "/proc/sys/net/ipv4/ip_local_port_range"
+
+/** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral
+ *
+ * Work out what ports the host thinks are emphemeral and record it for later
+ * use by fwd_port_is_ephemeral(). If we're unable to probe, assume the range
+ * recommended by RFC 6335.
+ */
+void fwd_probe_ephemeral(void)
+{
+ char *line, *tab, *end;
+ struct lineread lr;
+ long min, max;
+ ssize_t len;
+ int fd;
+
+ fd = open(PORT_RANGE_SYSCTL, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ warn_perror("Unable to open %s", PORT_RANGE_SYSCTL);
+ return;
+ }
+
+ lineread_init(&lr, fd);
+ len = lineread_get(&lr, &line);
+ close(fd);
+
+ if (len < 0)
+ goto parse_err;
+
+ tab = strchr(line, '\t');
+ if (!tab)
+ goto parse_err;
+ *tab = '\0';
+
+ errno = 0;
+ min = strtol(line, &end, 10);
+ if (*end || errno)
+ goto parse_err;
+
+ errno = 0;
+ max = strtol(tab + 1, &end, 10);
+ if (*end || errno)
+ goto parse_err;
+
+ if (min < 0 || min >= (long)NUM_PORTS ||
+ max < 0 || max >= (long)NUM_PORTS)
+ goto parse_err;
+
+ fwd_ephemeral_min = min;
+ fwd_ephemeral_max = max;
+
+ return;
+
+parse_err:
+ warn("Unable to parse %s", PORT_RANGE_SYSCTL);
+}
+
+/**
+ * fwd_port_is_ephemeral() - Is port number ephemeral?
+ * @port: Port number
+ *
+ * Return: true if @port is ephemeral, that is may be allocated by the kernel as
+ * a local port for outgoing connections or datagrams, but should not be
+ * used for binding services to.
+ */
+bool fwd_port_is_ephemeral(in_port_t port)
+{
+ return (port >= fwd_ephemeral_min) && (port <= fwd_ephemeral_max);
+}
+
/* See enum in kernel's include/net/tcp_states.h */
#define UDP_LISTEN 0x07
#define TCP_LISTEN 0x0a
@@ -167,7 +241,86 @@ void fwd_scan_ports_init(struct ctx *c)
static bool is_dns_flow(uint8_t proto, const struct flowside *ini)
{
return ((proto == IPPROTO_UDP) || (proto == IPPROTO_TCP)) &&
- ((ini->fport == 53) || (ini->fport == 853));
+ ((ini->oport == 53) || (ini->oport == 853));
+}
+
+/**
+ * fwd_guest_accessible4() - Is IPv4 address guest-accessible
+ * @c: Execution context
+ * @addr: Host visible IPv4 address
+ *
+ * Return: true if @addr on the host is accessible to the guest without
+ * translation, false otherwise
+ */
+static bool fwd_guest_accessible4(const struct ctx *c,
+ const struct in_addr *addr)
+{
+ if (IN4_IS_ADDR_LOOPBACK(addr))
+ return false;
+
+ /* In socket interfaces 0.0.0.0 generally means "any" or unspecified,
+ * however on the wire it can mean "this host on this network". Since
+ * that has a different meaning for host and guest, we can't let it
+ * through untranslated.
+ */
+ if (IN4_IS_ADDR_UNSPECIFIED(addr))
+ return false;
+
+ /* For IPv4, addr_seen is initialised to addr, so is always a valid
+ * address
+ */
+ if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr) ||
+ IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
+ return false;
+
+ return true;
+}
+
+/**
+ * fwd_guest_accessible6() - Is IPv6 address guest-accessible
+ * @c: Execution context
+ * @addr: Host visible IPv6 address
+ *
+ * Return: true if @addr on the host is accessible to the guest without
+ * translation, false otherwise
+ */
+static bool fwd_guest_accessible6(const struct ctx *c,
+ const struct in6_addr *addr)
+{
+ if (IN6_IS_ADDR_LOOPBACK(addr))
+ return false;
+
+ if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr))
+ return false;
+
+ /* For IPv6, addr_seen starts unspecified, because we don't know what LL
+ * address the guest will take until we see it. Only check against it
+ * if it has been set to a real address.
+ */
+ if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_seen) &&
+ IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr_seen))
+ return false;
+
+ return true;
+}
+
+/**
+ * fwd_guest_accessible() - Is IPv[46] address guest-accessible
+ * @c: Execution context
+ * @addr: Host visible IPv[46] address
+ *
+ * Return: true if @addr on the host is accessible to the guest without
+ * translation, false otherwise
+ */
+static bool fwd_guest_accessible(const struct ctx *c,
+ const union inany_addr *addr)
+{
+ const struct in_addr *a4 = inany_v4(addr);
+
+ if (a4)
+ return fwd_guest_accessible4(c, a4);
+
+ return fwd_guest_accessible6(c, &addr->a6);
}
/**
@@ -184,33 +337,37 @@ uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
if (is_dns_flow(proto, ini) &&
- inany_equals4(&ini->faddr, &c->ip4.dns_match))
+ inany_equals4(&ini->oaddr, &c->ip4.dns_match))
tgt->eaddr = inany_from_v4(c->ip4.dns_host);
else if (is_dns_flow(proto, ini) &&
- inany_equals6(&ini->faddr, &c->ip6.dns_match))
+ inany_equals6(&ini->oaddr, &c->ip6.dns_match))
tgt->eaddr.a6 = c->ip6.dns_host;
- else if (!c->no_map_gw && inany_equals4(&ini->faddr, &c->ip4.gw))
+ else if (inany_equals4(&ini->oaddr, &c->ip4.map_host_loopback))
tgt->eaddr = inany_loopback4;
- else if (!c->no_map_gw && inany_equals6(&ini->faddr, &c->ip6.gw))
+ else if (inany_equals6(&ini->oaddr, &c->ip6.map_host_loopback))
tgt->eaddr = inany_loopback6;
+ else if (inany_equals4(&ini->oaddr, &c->ip4.map_guest_addr))
+ tgt->eaddr = inany_from_v4(c->ip4.addr);
+ else if (inany_equals6(&ini->oaddr, &c->ip6.map_guest_addr))
+ tgt->eaddr.a6 = c->ip6.addr;
else
- tgt->eaddr = ini->faddr;
+ tgt->eaddr = ini->oaddr;
- tgt->eport = ini->fport;
+ tgt->eport = ini->oport;
/* The relevant addr_out controls the host side source address. This
* may be unspecified, which allows the kernel to pick an address.
*/
if (inany_v4(&tgt->eaddr))
- tgt->faddr = inany_from_v4(c->ip4.addr_out);
+ tgt->oaddr = inany_from_v4(c->ip4.addr_out);
else
- tgt->faddr.a6 = c->ip6.addr_out;
+ tgt->oaddr.a6 = c->ip6.addr_out;
/* Let the kernel pick a host side source port */
- tgt->fport = 0;
+ tgt->oport = 0;
if (proto == IPPROTO_UDP) {
/* But for UDP we preserve the source port */
- tgt->fport = ini->eport;
+ tgt->oport = ini->eport;
}
return PIF_HOST;
@@ -230,13 +387,13 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
if (!inany_is_loopback(&ini->eaddr) ||
- (!inany_is_loopback(&ini->faddr) && !inany_is_unspecified(&ini->faddr))) {
+ (!inany_is_loopback(&ini->oaddr) && !inany_is_unspecified(&ini->oaddr))) {
char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu",
pif_name(PIF_SPLICE),
inany_ntop(&ini->eaddr, estr, sizeof(estr)), ini->eport,
- inany_ntop(&ini->faddr, fstr, sizeof(fstr)), ini->fport);
+ inany_ntop(&ini->oaddr, fstr, sizeof(fstr)), ini->oport);
return PIF_NONE;
}
@@ -248,20 +405,20 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
/* Preserve the specific loopback adddress used, but let the kernel pick
* a source port on the target side
*/
- tgt->faddr = ini->eaddr;
- tgt->fport = 0;
+ tgt->oaddr = ini->eaddr;
+ tgt->oport = 0;
- tgt->eport = ini->fport;
+ tgt->eport = ini->oport;
if (proto == IPPROTO_TCP)
tgt->eport += c->tcp.fwd_out.delta[tgt->eport];
else if (proto == IPPROTO_UDP)
tgt->eport += c->udp.fwd_out.delta[tgt->eport];
/* Let the kernel pick a host side source port */
- tgt->fport = 0;
+ tgt->oport = 0;
if (proto == IPPROTO_UDP)
/* But for UDP preserve the source port */
- tgt->fport = ini->eport;
+ tgt->oport = ini->eport;
return PIF_HOST;
}
@@ -280,53 +437,79 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
/* Common for spliced and non-spliced cases */
- tgt->eport = ini->fport;
+ tgt->eport = ini->oport;
if (proto == IPPROTO_TCP)
tgt->eport += c->tcp.fwd_in.delta[tgt->eport];
else if (proto == IPPROTO_UDP)
tgt->eport += c->udp.fwd_in.delta[tgt->eport];
- if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
+ if (!c->no_splice && inany_is_loopback(&ini->eaddr) &&
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
/* spliceable */
- /* Preserve the specific loopback adddress used, but let the
- * kernel pick a source port on the target side
+ /* The traffic will go over the guest's 'lo' interface, but by
+ * default use its external address, so we don't inadvertently
+ * expose services that listen only on the guest's loopback
+ * address. That can be overridden by --host-lo-to-ns-lo which
+ * will instead forward to the loopback address in the guest.
+ *
+ * In either case, let the kernel pick the source address to
+ * match.
*/
- tgt->faddr = ini->eaddr;
- tgt->fport = 0;
+ if (inany_v4(&ini->eaddr)) {
+ if (c->host_lo_to_ns_lo)
+ tgt->eaddr = inany_loopback4;
+ else
+ tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
+ tgt->oaddr = inany_any4;
+ } else {
+ if (c->host_lo_to_ns_lo)
+ tgt->eaddr = inany_loopback6;
+ else
+ tgt->eaddr.a6 = c->ip6.addr_seen;
+ tgt->oaddr = inany_any6;
+ }
+
+ /* Let the kernel pick source port */
+ tgt->oport = 0;
if (proto == IPPROTO_UDP)
/* But for UDP preserve the source port */
- tgt->fport = ini->eport;
-
- if (inany_v4(&ini->eaddr))
- tgt->eaddr = inany_loopback4;
- else
- tgt->eaddr = inany_loopback6;
+ tgt->oport = ini->eport;
return PIF_SPLICE;
}
- tgt->faddr = ini->eaddr;
- tgt->fport = ini->eport;
-
- if (inany_is_loopback4(&tgt->faddr) ||
- inany_is_unspecified4(&tgt->faddr) ||
- inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) {
- tgt->faddr = inany_from_v4(c->ip4.gw);
- } else if (inany_is_loopback6(&tgt->faddr) ||
- inany_equals6(&tgt->faddr, &c->ip6.addr_seen) ||
- inany_equals6(&tgt->faddr, &c->ip6.addr)) {
- if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
- tgt->faddr.a6 = c->ip6.gw;
- else
- tgt->faddr.a6 = c->ip6.addr_ll;
+ if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_host_loopback) &&
+ inany_equals4(&ini->eaddr, &in4addr_loopback)) {
+ /* Specifically 127.0.0.1, not 127.0.0.0/8 */
+ tgt->oaddr = inany_from_v4(c->ip4.map_host_loopback);
+ } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_host_loopback) &&
+ inany_equals6(&ini->eaddr, &in6addr_loopback)) {
+ tgt->oaddr.a6 = c->ip6.map_host_loopback;
+ } else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) &&
+ inany_equals4(&ini->eaddr, &c->ip4.addr)) {
+ tgt->oaddr = inany_from_v4(c->ip4.map_guest_addr);
+ } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) &&
+ inany_equals6(&ini->eaddr, &c->ip6.addr)) {
+ tgt->oaddr.a6 = c->ip6.map_guest_addr;
+ } else if (!fwd_guest_accessible(c, &ini->eaddr)) {
+ if (inany_v4(&ini->eaddr)) {
+ if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.our_tap_addr))
+ /* No source address we can use */
+ return PIF_NONE;
+ tgt->oaddr = inany_from_v4(c->ip4.our_tap_addr);
+ } else {
+ tgt->oaddr.a6 = c->ip6.our_tap_ll;
+ }
+ } else {
+ tgt->oaddr = ini->eaddr;
}
+ tgt->oport = ini->eport;
- if (inany_v4(&tgt->faddr)) {
+ if (inany_v4(&tgt->oaddr)) {
tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
} else {
- if (inany_is_linklocal6(&tgt->faddr))
+ if (inany_is_linklocal6(&tgt->oaddr))
tgt->eaddr.a6 = c->ip6.addr_ll_seen;
else
tgt->eaddr.a6 = c->ip6.addr_seen;