diff options
Diffstat (limited to 'fwd.c')
-rw-r--r-- | fwd.c | 275 |
1 files changed, 229 insertions, 46 deletions
@@ -27,6 +27,80 @@ #include "lineread.h" #include "flow_table.h" +/* Empheral port range: values from RFC 6335 */ +static in_port_t fwd_ephemeral_min = (1 << 15) + (1 << 14); +static in_port_t fwd_ephemeral_max = NUM_PORTS - 1; + +#define PORT_RANGE_SYSCTL "/proc/sys/net/ipv4/ip_local_port_range" + +/** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral + * + * Work out what ports the host thinks are emphemeral and record it for later + * use by fwd_port_is_ephemeral(). If we're unable to probe, assume the range + * recommended by RFC 6335. + */ +void fwd_probe_ephemeral(void) +{ + char *line, *tab, *end; + struct lineread lr; + long min, max; + ssize_t len; + int fd; + + fd = open(PORT_RANGE_SYSCTL, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + warn_perror("Unable to open %s", PORT_RANGE_SYSCTL); + return; + } + + lineread_init(&lr, fd); + len = lineread_get(&lr, &line); + close(fd); + + if (len < 0) + goto parse_err; + + tab = strchr(line, '\t'); + if (!tab) + goto parse_err; + *tab = '\0'; + + errno = 0; + min = strtol(line, &end, 10); + if (*end || errno) + goto parse_err; + + errno = 0; + max = strtol(tab + 1, &end, 10); + if (*end || errno) + goto parse_err; + + if (min < 0 || min >= (long)NUM_PORTS || + max < 0 || max >= (long)NUM_PORTS) + goto parse_err; + + fwd_ephemeral_min = min; + fwd_ephemeral_max = max; + + return; + +parse_err: + warn("Unable to parse %s", PORT_RANGE_SYSCTL); +} + +/** + * fwd_port_is_ephemeral() - Is port number ephemeral? + * @port: Port number + * + * Return: true if @port is ephemeral, that is may be allocated by the kernel as + * a local port for outgoing connections or datagrams, but should not be + * used for binding services to. + */ +bool fwd_port_is_ephemeral(in_port_t port) +{ + return (port >= fwd_ephemeral_min) && (port <= fwd_ephemeral_max); +} + /* See enum in kernel's include/net/tcp_states.h */ #define UDP_LISTEN 0x07 #define TCP_LISTEN 0x0a @@ -167,7 +241,86 @@ void fwd_scan_ports_init(struct ctx *c) static bool is_dns_flow(uint8_t proto, const struct flowside *ini) { return ((proto == IPPROTO_UDP) || (proto == IPPROTO_TCP)) && - ((ini->fport == 53) || (ini->fport == 853)); + ((ini->oport == 53) || (ini->oport == 853)); +} + +/** + * fwd_guest_accessible4() - Is IPv4 address guest-accessible + * @c: Execution context + * @addr: Host visible IPv4 address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible4(const struct ctx *c, + const struct in_addr *addr) +{ + if (IN4_IS_ADDR_LOOPBACK(addr)) + return false; + + /* In socket interfaces 0.0.0.0 generally means "any" or unspecified, + * however on the wire it can mean "this host on this network". Since + * that has a different meaning for host and guest, we can't let it + * through untranslated. + */ + if (IN4_IS_ADDR_UNSPECIFIED(addr)) + return false; + + /* For IPv4, addr_seen is initialised to addr, so is always a valid + * address + */ + if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr) || + IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen)) + return false; + + return true; +} + +/** + * fwd_guest_accessible6() - Is IPv6 address guest-accessible + * @c: Execution context + * @addr: Host visible IPv6 address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible6(const struct ctx *c, + const struct in6_addr *addr) +{ + if (IN6_IS_ADDR_LOOPBACK(addr)) + return false; + + if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr)) + return false; + + /* For IPv6, addr_seen starts unspecified, because we don't know what LL + * address the guest will take until we see it. Only check against it + * if it has been set to a real address. + */ + if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_seen) && + IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr_seen)) + return false; + + return true; +} + +/** + * fwd_guest_accessible() - Is IPv[46] address guest-accessible + * @c: Execution context + * @addr: Host visible IPv[46] address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible(const struct ctx *c, + const union inany_addr *addr) +{ + const struct in_addr *a4 = inany_v4(addr); + + if (a4) + return fwd_guest_accessible4(c, a4); + + return fwd_guest_accessible6(c, &addr->a6); } /** @@ -184,33 +337,37 @@ uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto, const struct flowside *ini, struct flowside *tgt) { if (is_dns_flow(proto, ini) && - inany_equals4(&ini->faddr, &c->ip4.dns_match)) + inany_equals4(&ini->oaddr, &c->ip4.dns_match)) tgt->eaddr = inany_from_v4(c->ip4.dns_host); else if (is_dns_flow(proto, ini) && - inany_equals6(&ini->faddr, &c->ip6.dns_match)) + inany_equals6(&ini->oaddr, &c->ip6.dns_match)) tgt->eaddr.a6 = c->ip6.dns_host; - else if (!c->no_map_gw && inany_equals4(&ini->faddr, &c->ip4.gw)) + else if (inany_equals4(&ini->oaddr, &c->ip4.map_host_loopback)) tgt->eaddr = inany_loopback4; - else if (!c->no_map_gw && inany_equals6(&ini->faddr, &c->ip6.gw)) + else if (inany_equals6(&ini->oaddr, &c->ip6.map_host_loopback)) tgt->eaddr = inany_loopback6; + else if (inany_equals4(&ini->oaddr, &c->ip4.map_guest_addr)) + tgt->eaddr = inany_from_v4(c->ip4.addr); + else if (inany_equals6(&ini->oaddr, &c->ip6.map_guest_addr)) + tgt->eaddr.a6 = c->ip6.addr; else - tgt->eaddr = ini->faddr; + tgt->eaddr = ini->oaddr; - tgt->eport = ini->fport; + tgt->eport = ini->oport; /* The relevant addr_out controls the host side source address. This * may be unspecified, which allows the kernel to pick an address. */ if (inany_v4(&tgt->eaddr)) - tgt->faddr = inany_from_v4(c->ip4.addr_out); + tgt->oaddr = inany_from_v4(c->ip4.addr_out); else - tgt->faddr.a6 = c->ip6.addr_out; + tgt->oaddr.a6 = c->ip6.addr_out; /* Let the kernel pick a host side source port */ - tgt->fport = 0; + tgt->oport = 0; if (proto == IPPROTO_UDP) { /* But for UDP we preserve the source port */ - tgt->fport = ini->eport; + tgt->oport = ini->eport; } return PIF_HOST; @@ -230,13 +387,13 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto, const struct flowside *ini, struct flowside *tgt) { if (!inany_is_loopback(&ini->eaddr) || - (!inany_is_loopback(&ini->faddr) && !inany_is_unspecified(&ini->faddr))) { + (!inany_is_loopback(&ini->oaddr) && !inany_is_unspecified(&ini->oaddr))) { char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN]; debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu", pif_name(PIF_SPLICE), inany_ntop(&ini->eaddr, estr, sizeof(estr)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), ini->fport); + inany_ntop(&ini->oaddr, fstr, sizeof(fstr)), ini->oport); return PIF_NONE; } @@ -248,20 +405,20 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto, /* Preserve the specific loopback adddress used, but let the kernel pick * a source port on the target side */ - tgt->faddr = ini->eaddr; - tgt->fport = 0; + tgt->oaddr = ini->eaddr; + tgt->oport = 0; - tgt->eport = ini->fport; + tgt->eport = ini->oport; if (proto == IPPROTO_TCP) tgt->eport += c->tcp.fwd_out.delta[tgt->eport]; else if (proto == IPPROTO_UDP) tgt->eport += c->udp.fwd_out.delta[tgt->eport]; /* Let the kernel pick a host side source port */ - tgt->fport = 0; + tgt->oport = 0; if (proto == IPPROTO_UDP) /* But for UDP preserve the source port */ - tgt->fport = ini->eport; + tgt->oport = ini->eport; return PIF_HOST; } @@ -280,53 +437,79 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto, const struct flowside *ini, struct flowside *tgt) { /* Common for spliced and non-spliced cases */ - tgt->eport = ini->fport; + tgt->eport = ini->oport; if (proto == IPPROTO_TCP) tgt->eport += c->tcp.fwd_in.delta[tgt->eport]; else if (proto == IPPROTO_UDP) tgt->eport += c->udp.fwd_in.delta[tgt->eport]; - if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) && + if (!c->no_splice && inany_is_loopback(&ini->eaddr) && (proto == IPPROTO_TCP || proto == IPPROTO_UDP)) { /* spliceable */ - /* Preserve the specific loopback adddress used, but let the - * kernel pick a source port on the target side + /* The traffic will go over the guest's 'lo' interface, but by + * default use its external address, so we don't inadvertently + * expose services that listen only on the guest's loopback + * address. That can be overridden by --host-lo-to-ns-lo which + * will instead forward to the loopback address in the guest. + * + * In either case, let the kernel pick the source address to + * match. */ - tgt->faddr = ini->eaddr; - tgt->fport = 0; + if (inany_v4(&ini->eaddr)) { + if (c->host_lo_to_ns_lo) + tgt->eaddr = inany_loopback4; + else + tgt->eaddr = inany_from_v4(c->ip4.addr_seen); + tgt->oaddr = inany_any4; + } else { + if (c->host_lo_to_ns_lo) + tgt->eaddr = inany_loopback6; + else + tgt->eaddr.a6 = c->ip6.addr_seen; + tgt->oaddr = inany_any6; + } + + /* Let the kernel pick source port */ + tgt->oport = 0; if (proto == IPPROTO_UDP) /* But for UDP preserve the source port */ - tgt->fport = ini->eport; - - if (inany_v4(&ini->eaddr)) - tgt->eaddr = inany_loopback4; - else - tgt->eaddr = inany_loopback6; + tgt->oport = ini->eport; return PIF_SPLICE; } - tgt->faddr = ini->eaddr; - tgt->fport = ini->eport; - - if (inany_is_loopback4(&tgt->faddr) || - inany_is_unspecified4(&tgt->faddr) || - inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) { - tgt->faddr = inany_from_v4(c->ip4.gw); - } else if (inany_is_loopback6(&tgt->faddr) || - inany_equals6(&tgt->faddr, &c->ip6.addr_seen) || - inany_equals6(&tgt->faddr, &c->ip6.addr)) { - if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) - tgt->faddr.a6 = c->ip6.gw; - else - tgt->faddr.a6 = c->ip6.addr_ll; + if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_host_loopback) && + inany_equals4(&ini->eaddr, &in4addr_loopback)) { + /* Specifically 127.0.0.1, not 127.0.0.0/8 */ + tgt->oaddr = inany_from_v4(c->ip4.map_host_loopback); + } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_host_loopback) && + inany_equals6(&ini->eaddr, &in6addr_loopback)) { + tgt->oaddr.a6 = c->ip6.map_host_loopback; + } else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) && + inany_equals4(&ini->eaddr, &c->ip4.addr)) { + tgt->oaddr = inany_from_v4(c->ip4.map_guest_addr); + } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) && + inany_equals6(&ini->eaddr, &c->ip6.addr)) { + tgt->oaddr.a6 = c->ip6.map_guest_addr; + } else if (!fwd_guest_accessible(c, &ini->eaddr)) { + if (inany_v4(&ini->eaddr)) { + if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.our_tap_addr)) + /* No source address we can use */ + return PIF_NONE; + tgt->oaddr = inany_from_v4(c->ip4.our_tap_addr); + } else { + tgt->oaddr.a6 = c->ip6.our_tap_ll; + } + } else { + tgt->oaddr = ini->eaddr; } + tgt->oport = ini->eport; - if (inany_v4(&tgt->faddr)) { + if (inany_v4(&tgt->oaddr)) { tgt->eaddr = inany_from_v4(c->ip4.addr_seen); } else { - if (inany_is_linklocal6(&tgt->faddr)) + if (inany_is_linklocal6(&tgt->oaddr)) tgt->eaddr.a6 = c->ip6.addr_ll_seen; else tgt->eaddr.a6 = c->ip6.addr_seen; |