aboutgitcodebugslistschat
path: root/fwd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fwd.c')
-rw-r--r--fwd.c637
1 files changed, 553 insertions, 84 deletions
diff --git a/fwd.c b/fwd.c
index dea36f6..44a0e10 100644
--- a/fwd.c
+++ b/fwd.c
@@ -26,6 +26,292 @@
#include "passt.h"
#include "lineread.h"
#include "flow_table.h"
+#include "netlink.h"
+#include "arp.h"
+#include "ndp.h"
+
+/* Ephemeral port range: values from RFC 6335 */
+static in_port_t fwd_ephemeral_min = (1 << 15) + (1 << 14);
+static in_port_t fwd_ephemeral_max = NUM_PORTS - 1;
+
+#define PORT_RANGE_SYSCTL "/proc/sys/net/ipv4/ip_local_port_range"
+
+#define NEIGH_TABLE_SLOTS 1024
+#define NEIGH_TABLE_SIZE (NEIGH_TABLE_SLOTS / 2)
+static_assert((NEIGH_TABLE_SLOTS & (NEIGH_TABLE_SLOTS - 1)) == 0,
+ "NEIGH_TABLE_SLOTS must be a power of two");
+
+/**
+ * struct neigh_table_entry - Entry in the ARP/NDP table
+ * @next: Next entry in slot or free list
+ * @addr: IP address of represented host
+ * @mac: MAC address of represented host
+ * @permanent: Entry cannot be altered or freed by notification
+ */
+struct neigh_table_entry {
+ struct neigh_table_entry *next;
+ union inany_addr addr;
+ uint8_t mac[ETH_ALEN];
+ bool permanent;
+};
+
+/**
+ * struct neigh_table - Cache of ARP/NDP table contents
+ * @entries: Entries to be plugged into the hash slots when allocated
+ * @slots: Hash table slots
+ * @free: Linked list of unused entries
+ */
+struct neigh_table {
+ struct neigh_table_entry entries[NEIGH_TABLE_SIZE];
+ struct neigh_table_entry *slots[NEIGH_TABLE_SLOTS];
+ struct neigh_table_entry *free;
+};
+
+static struct neigh_table neigh_table;
+
+/**
+ * neigh_table_slot() - Hash key to a number within the table range
+ * @c: Execution context
+ * @key: The key to be used for the hash
+ *
+ * Return: the resulting hash value
+ */
+static size_t neigh_table_slot(const struct ctx *c,
+ const union inany_addr *key)
+{
+ struct siphash_state st = SIPHASH_INIT(c->hash_secret);
+ uint32_t i;
+
+ inany_siphash_feed(&st, key);
+ i = siphash_final(&st, sizeof(*key), 0);
+
+ return ((size_t)i) & (NEIGH_TABLE_SIZE - 1);
+}
+
+/**
+ * fwd_neigh_table_find() - Find a MAC table entry
+ * @c: Execution context
+ * @addr: Neighbour address to be used as key for the lookup
+ *
+ * Return: the matching entry, if found. Otherwise NULL
+ */
+static struct neigh_table_entry *fwd_neigh_table_find(const struct ctx *c,
+ const union inany_addr *addr)
+{
+ size_t slot = neigh_table_slot(c, addr);
+ struct neigh_table_entry *e = neigh_table.slots[slot];
+
+ while (e && !inany_equals(&e->addr, addr))
+ e = e->next;
+
+ return e;
+}
+
+/**
+ * fwd_neigh_table_update() - Allocate or update neighbour table entry
+ * @c: Execution context
+ * @addr: IP address used to determine insertion slot and store in entry
+ * @mac: The MAC address associated with the neighbour address
+ * @permanent: Created entry cannot be altered or freed
+ */
+void fwd_neigh_table_update(const struct ctx *c, const union inany_addr *addr,
+ const uint8_t *mac, bool permanent)
+{
+ struct neigh_table *t = &neigh_table;
+ struct neigh_table_entry *e;
+ ssize_t slot;
+
+ /* MAC address might change sometimes */
+ e = fwd_neigh_table_find(c, addr);
+ if (e) {
+ if (!e->permanent)
+ memcpy(e->mac, mac, ETH_ALEN);
+ return;
+ }
+
+ e = t->free;
+ if (!e) {
+ debug("Failed to allocate neighbour table entry");
+ return;
+ }
+ t->free = e->next;
+ slot = neigh_table_slot(c, addr);
+ e->next = t->slots[slot];
+ t->slots[slot] = e;
+
+ memcpy(&e->addr, addr, sizeof(*addr));
+ memcpy(e->mac, mac, ETH_ALEN);
+ e->permanent = permanent;
+
+ if (!memcmp(mac, c->our_tap_mac, ETH_ALEN))
+ return;
+
+ if (inany_v4(addr))
+ arp_announce(c, inany_v4(addr), e->mac);
+ else
+ ndp_unsolicited_na(c, &addr->a6);
+}
+
+/**
+ * fwd_neigh_table_free() - Remove an entry from a slot and add it to free list
+ * @c: Execution context
+ * @addr: IP address used to find the slot for the entry
+ */
+void fwd_neigh_table_free(const struct ctx *c, const union inany_addr *addr)
+{
+ ssize_t slot = neigh_table_slot(c, addr);
+ struct neigh_table *t = &neigh_table;
+ struct neigh_table_entry *e, **prev;
+
+ prev = &t->slots[slot];
+ e = t->slots[slot];
+ while (e && !inany_equals(&e->addr, addr)) {
+ prev = &e->next;
+ e = e->next;
+ }
+
+ if (!e || e->permanent)
+ return;
+
+ *prev = e->next;
+ e->next = t->free;
+ t->free = e;
+ memset(&e->addr, 0, sizeof(*addr));
+ memset(e->mac, 0, ETH_ALEN);
+}
+
+/**
+ * fwd_neigh_mac_get() - Look up MAC address in the ARP/NDP table
+ * @c: Execution context
+ * @addr: Neighbour IP address used as lookup key
+ * @mac: Buffer for returned MAC address
+ */
+void fwd_neigh_mac_get(const struct ctx *c, const union inany_addr *addr,
+ uint8_t *mac)
+{
+ const struct neigh_table_entry *e = fwd_neigh_table_find(c, addr);
+
+ if (!e) {
+ union inany_addr ggw;
+
+ if (inany_v4(addr))
+ ggw = inany_from_v4(c->ip4.guest_gw);
+ else
+ ggw.a6 = c->ip6.guest_gw;
+
+ e = fwd_neigh_table_find(c, &ggw);
+ }
+
+ if (e)
+ memcpy(mac, e->mac, ETH_ALEN);
+ else
+ memcpy(mac, c->our_tap_mac, ETH_ALEN);
+}
+
+/**
+ * fwd_neigh_table_init() - Initialize the neighbour table
+ * @c: Execution context
+ */
+void fwd_neigh_table_init(const struct ctx *c)
+{
+ union inany_addr mhl = inany_from_v4(c->ip4.map_host_loopback);
+ union inany_addr mga = inany_from_v4(c->ip4.map_guest_addr);
+ struct neigh_table *t = &neigh_table;
+ struct neigh_table_entry *e;
+ int i;
+
+ memset(t, 0, sizeof(*t));
+
+ for (i = 0; i < NEIGH_TABLE_SIZE; i++) {
+ e = &t->entries[i];
+ e->next = t->free;
+ t->free = e;
+ }
+
+ /* Blocker entries to stop events from hosts using these addresses */
+ if (!inany_is_unspecified4(&mhl))
+ fwd_neigh_table_update(c, &mhl, c->our_tap_mac, true);
+
+ if (!inany_is_unspecified4(&mga))
+ fwd_neigh_table_update(c, &mga, c->our_tap_mac, true);
+
+ mhl = *(union inany_addr *)&c->ip6.map_host_loopback;
+ mga = *(union inany_addr *)&c->ip6.map_guest_addr;
+
+ if (!inany_is_unspecified6(&mhl))
+ fwd_neigh_table_update(c, &mhl, c->our_tap_mac, true);
+
+ if (!inany_is_unspecified6(&mga))
+ fwd_neigh_table_update(c, &mga, c->our_tap_mac, true);
+}
+
+/** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral
+ *
+ * Work out what ports the host thinks are emphemeral and record it for later
+ * use by fwd_port_is_ephemeral(). If we're unable to probe, assume the range
+ * recommended by RFC 6335.
+ */
+void fwd_probe_ephemeral(void)
+{
+ char *line, *tab, *end;
+ struct lineread lr;
+ long min, max;
+ ssize_t len;
+ int fd;
+
+ fd = open(PORT_RANGE_SYSCTL, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ warn_perror("Unable to open %s", PORT_RANGE_SYSCTL);
+ return;
+ }
+
+ lineread_init(&lr, fd);
+ len = lineread_get(&lr, &line);
+ close(fd);
+
+ if (len < 0)
+ goto parse_err;
+
+ tab = strchr(line, '\t');
+ if (!tab)
+ goto parse_err;
+ *tab = '\0';
+
+ errno = 0;
+ min = strtol(line, &end, 10);
+ if (*end || errno)
+ goto parse_err;
+
+ errno = 0;
+ max = strtol(tab + 1, &end, 10);
+ if (*end || errno)
+ goto parse_err;
+
+ if (min < 0 || min >= (long)NUM_PORTS ||
+ max < 0 || max >= (long)NUM_PORTS)
+ goto parse_err;
+
+ fwd_ephemeral_min = min;
+ fwd_ephemeral_max = max;
+
+ return;
+
+parse_err:
+ warn("Unable to parse %s", PORT_RANGE_SYSCTL);
+}
+
+/**
+ * fwd_port_is_ephemeral() - Is port number ephemeral?
+ * @port: Port number
+ *
+ * Return: true if @port is ephemeral, that is may be allocated by the kernel as
+ * a local port for outgoing connections or datagrams, but should not be
+ * used for binding services to.
+ */
+bool fwd_port_is_ephemeral(in_port_t port)
+{
+ return (port >= fwd_ephemeral_min) && (port <= fwd_ephemeral_max);
+}
/* See enum in kernel's include/net/tcp_states.h */
#define UDP_LISTEN 0x07
@@ -36,13 +322,11 @@
* @fd: fd for relevant /proc/net file
* @lstate: Code for listening state to scan for
* @map: Bitmap where numbers of ports in listening state will be set
- * @exclude: Bitmap of ports to exclude from setting (and clear)
*
* #syscalls:pasta lseek
* #syscalls:pasta ppc64le:_llseek ppc64:_llseek arm:_llseek
*/
-static void procfs_scan_listen(int fd, unsigned int lstate,
- uint8_t *map, const uint8_t *exclude)
+static void procfs_scan_listen(int fd, unsigned int lstate, uint8_t *map)
{
struct lineread lr;
unsigned long port;
@@ -67,56 +351,72 @@ static void procfs_scan_listen(int fd, unsigned int lstate,
if (state != lstate)
continue;
- if (bitmap_isset(exclude, port))
- bitmap_clear(map, port);
- else
- bitmap_set(map, port);
+ bitmap_set(map, port);
}
}
/**
* fwd_scan_ports_tcp() - Scan /proc to update TCP forwarding map
* @fwd: Forwarding information to update
- * @rev: Forwarding information for the reverse direction
+ * @exclude: Ports to _not_ forward
*/
-void fwd_scan_ports_tcp(struct fwd_ports *fwd, const struct fwd_ports *rev)
+static void fwd_scan_ports_tcp(struct fwd_ports *fwd, const uint8_t *exclude)
{
+ if (fwd->mode != FWD_AUTO)
+ return;
+
memset(fwd->map, 0, PORT_BITMAP_SIZE);
- procfs_scan_listen(fwd->scan4, TCP_LISTEN, fwd->map, rev->map);
- procfs_scan_listen(fwd->scan6, TCP_LISTEN, fwd->map, rev->map);
+ procfs_scan_listen(fwd->scan4, TCP_LISTEN, fwd->map);
+ procfs_scan_listen(fwd->scan6, TCP_LISTEN, fwd->map);
+ bitmap_and_not(fwd->map, PORT_BITMAP_SIZE, fwd->map, exclude);
}
/**
* fwd_scan_ports_udp() - Scan /proc to update UDP forwarding map
* @fwd: Forwarding information to update
- * @rev: Forwarding information for the reverse direction
* @tcp_fwd: Corresponding TCP forwarding information
- * @tcp_rev: TCP forwarding information for the reverse direction
+ * @exclude: Ports to _not_ forward
*/
-void fwd_scan_ports_udp(struct fwd_ports *fwd, const struct fwd_ports *rev,
- const struct fwd_ports *tcp_fwd,
- const struct fwd_ports *tcp_rev)
+static void fwd_scan_ports_udp(struct fwd_ports *fwd,
+ const struct fwd_ports *tcp_fwd,
+ const uint8_t *exclude)
{
- uint8_t exclude[PORT_BITMAP_SIZE];
-
- bitmap_or(exclude, PORT_BITMAP_SIZE, rev->map, tcp_rev->map);
+ if (fwd->mode != FWD_AUTO)
+ return;
memset(fwd->map, 0, PORT_BITMAP_SIZE);
- procfs_scan_listen(fwd->scan4, UDP_LISTEN, fwd->map, exclude);
- procfs_scan_listen(fwd->scan6, UDP_LISTEN, fwd->map, exclude);
+ procfs_scan_listen(fwd->scan4, UDP_LISTEN, fwd->map);
+ procfs_scan_listen(fwd->scan6, UDP_LISTEN, fwd->map);
/* Also forward UDP ports with the same numbers as bound TCP ports.
* This is useful for a handful of protocols (e.g. iperf3) where a TCP
* control port is used to set up transfers on a corresponding UDP
* port.
- *
- * This means we need to skip numbers of TCP ports bound on the other
- * side, too. Otherwise, we would detect corresponding UDP ports as
- * bound and try to forward them from the opposite side, but it's
- * already us handling them.
*/
- procfs_scan_listen(tcp_fwd->scan4, TCP_LISTEN, fwd->map, exclude);
- procfs_scan_listen(tcp_fwd->scan6, TCP_LISTEN, fwd->map, exclude);
+ procfs_scan_listen(tcp_fwd->scan4, TCP_LISTEN, fwd->map);
+ procfs_scan_listen(tcp_fwd->scan6, TCP_LISTEN, fwd->map);
+
+ bitmap_and_not(fwd->map, PORT_BITMAP_SIZE, fwd->map, exclude);
+}
+
+/**
+ * fwd_scan_ports() - Scan automatic port forwarding information
+ * @c: Execution context
+ */
+static void fwd_scan_ports(struct ctx *c)
+{
+ uint8_t excl_tcp_out[PORT_BITMAP_SIZE], excl_udp_out[PORT_BITMAP_SIZE];
+ uint8_t excl_tcp_in[PORT_BITMAP_SIZE], excl_udp_in[PORT_BITMAP_SIZE];
+
+ memcpy(excl_tcp_out, c->tcp.fwd_in.map, sizeof(excl_tcp_out));
+ memcpy(excl_tcp_in, c->tcp.fwd_out.map, sizeof(excl_tcp_in));
+ memcpy(excl_udp_out, c->udp.fwd_in.map, sizeof(excl_udp_out));
+ memcpy(excl_udp_in, c->udp.fwd_out.map, sizeof(excl_udp_in));
+
+ fwd_scan_ports_tcp(&c->tcp.fwd_out, excl_tcp_out);
+ fwd_scan_ports_tcp(&c->tcp.fwd_in, excl_tcp_in);
+ fwd_scan_ports_udp(&c->udp.fwd_out, &c->tcp.fwd_out, excl_udp_out);
+ fwd_scan_ports_udp(&c->udp.fwd_in, &c->tcp.fwd_in, excl_udp_in);
}
/**
@@ -135,25 +435,46 @@ void fwd_scan_ports_init(struct ctx *c)
if (c->tcp.fwd_in.mode == FWD_AUTO) {
c->tcp.fwd_in.scan4 = open_in_ns(c, "/proc/net/tcp", flags);
c->tcp.fwd_in.scan6 = open_in_ns(c, "/proc/net/tcp6", flags);
- fwd_scan_ports_tcp(&c->tcp.fwd_in, &c->tcp.fwd_out);
}
if (c->udp.fwd_in.mode == FWD_AUTO) {
c->udp.fwd_in.scan4 = open_in_ns(c, "/proc/net/udp", flags);
c->udp.fwd_in.scan6 = open_in_ns(c, "/proc/net/udp6", flags);
- fwd_scan_ports_udp(&c->udp.fwd_in, &c->udp.fwd_out,
- &c->tcp.fwd_in, &c->tcp.fwd_out);
}
if (c->tcp.fwd_out.mode == FWD_AUTO) {
c->tcp.fwd_out.scan4 = open("/proc/net/tcp", flags);
c->tcp.fwd_out.scan6 = open("/proc/net/tcp6", flags);
- fwd_scan_ports_tcp(&c->tcp.fwd_out, &c->tcp.fwd_in);
}
if (c->udp.fwd_out.mode == FWD_AUTO) {
c->udp.fwd_out.scan4 = open("/proc/net/udp", flags);
c->udp.fwd_out.scan6 = open("/proc/net/udp6", flags);
- fwd_scan_ports_udp(&c->udp.fwd_out, &c->udp.fwd_in,
- &c->tcp.fwd_out, &c->tcp.fwd_in);
}
+ fwd_scan_ports(c);
+}
+
+/* Last time we scanned for open ports */
+static struct timespec scan_ports_run;
+
+/**
+ * fwd_scan_ports_timer() - Rescan open port information when necessary
+ * @c: Execution context
+ * @now: Current (monotonic) time
+ */
+void fwd_scan_ports_timer(struct ctx *c, const struct timespec *now)
+{
+ if (c->mode != MODE_PASTA)
+ return;
+
+ if (timespec_diff_ms(now, &scan_ports_run) < FWD_PORT_SCAN_INTERVAL)
+ return;
+
+ scan_ports_run = *now;
+
+ fwd_scan_ports(c);
+
+ if (!c->no_tcp)
+ tcp_port_rebind_all(c);
+ if (!c->no_udp)
+ udp_port_rebind_all(c);
}
/**
@@ -167,7 +488,110 @@ void fwd_scan_ports_init(struct ctx *c)
static bool is_dns_flow(uint8_t proto, const struct flowside *ini)
{
return ((proto == IPPROTO_UDP) || (proto == IPPROTO_TCP)) &&
- ((ini->fport == 53) || (ini->fport == 853));
+ ((ini->oport == 53) || (ini->oport == 853));
+}
+
+/**
+ * fwd_guest_accessible4() - Is IPv4 address guest-accessible
+ * @c: Execution context
+ * @addr: Host visible IPv4 address
+ *
+ * Return: true if @addr on the host is accessible to the guest without
+ * translation, false otherwise
+ */
+static bool fwd_guest_accessible4(const struct ctx *c,
+ const struct in_addr *addr)
+{
+ if (IN4_IS_ADDR_LOOPBACK(addr))
+ return false;
+
+ /* In socket interfaces 0.0.0.0 generally means "any" or unspecified,
+ * however on the wire it can mean "this host on this network". Since
+ * that has a different meaning for host and guest, we can't let it
+ * through untranslated.
+ */
+ if (IN4_IS_ADDR_UNSPECIFIED(addr))
+ return false;
+
+ /* For IPv4, addr_seen is initialised to addr, so is always a valid
+ * address
+ */
+ if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr) ||
+ IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
+ return false;
+
+ return true;
+}
+
+/**
+ * fwd_guest_accessible6() - Is IPv6 address guest-accessible
+ * @c: Execution context
+ * @addr: Host visible IPv6 address
+ *
+ * Return: true if @addr on the host is accessible to the guest without
+ * translation, false otherwise
+ */
+static bool fwd_guest_accessible6(const struct ctx *c,
+ const struct in6_addr *addr)
+{
+ if (IN6_IS_ADDR_LOOPBACK(addr))
+ return false;
+
+ if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr))
+ return false;
+
+ /* For IPv6, addr_seen starts unspecified, because we don't know what LL
+ * address the guest will take until we see it. Only check against it
+ * if it has been set to a real address.
+ */
+ if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_seen) &&
+ IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr_seen))
+ return false;
+
+ return true;
+}
+
+/**
+ * fwd_guest_accessible() - Is IPv[46] address guest-accessible
+ * @c: Execution context
+ * @addr: Host visible IPv[46] address
+ *
+ * Return: true if @addr on the host is accessible to the guest without
+ * translation, false otherwise
+ */
+static bool fwd_guest_accessible(const struct ctx *c,
+ const union inany_addr *addr)
+{
+ const struct in_addr *a4 = inany_v4(addr);
+
+ if (a4)
+ return fwd_guest_accessible4(c, a4);
+
+ return fwd_guest_accessible6(c, &addr->a6);
+}
+
+/**
+ * nat_outbound() - Apply address translation for outbound (TAP to HOST)
+ * @c: Execution context
+ * @addr: Input address (as seen on TAP interface)
+ * @translated: Output address (as seen on HOST interface)
+ *
+ * Only handles translations that depend *only* on the address. Anything
+ * related to specific ports or flows is handled elsewhere.
+ */
+static void nat_outbound(const struct ctx *c, const union inany_addr *addr,
+ union inany_addr *translated)
+{
+ if (inany_equals4(addr, &c->ip4.map_host_loopback))
+ *translated = inany_loopback4;
+ else if (inany_equals6(addr, &c->ip6.map_host_loopback))
+ *translated = inany_loopback6;
+ else if (inany_equals4(addr, &c->ip4.map_guest_addr))
+ *translated = inany_from_v4(c->ip4.addr);
+ else if (inany_equals6(addr, &c->ip6.map_guest_addr))
+ translated->a6 = c->ip6.addr;
+ else
+ *translated = *addr;
}
/**
@@ -184,33 +608,29 @@ uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
if (is_dns_flow(proto, ini) &&
- inany_equals4(&ini->faddr, &c->ip4.dns_match))
+ inany_equals4(&ini->oaddr, &c->ip4.dns_match))
tgt->eaddr = inany_from_v4(c->ip4.dns_host);
else if (is_dns_flow(proto, ini) &&
- inany_equals6(&ini->faddr, &c->ip6.dns_match))
+ inany_equals6(&ini->oaddr, &c->ip6.dns_match))
tgt->eaddr.a6 = c->ip6.dns_host;
- else if (!c->no_map_gw && inany_equals4(&ini->faddr, &c->ip4.gw))
- tgt->eaddr = inany_loopback4;
- else if (!c->no_map_gw && inany_equals6(&ini->faddr, &c->ip6.gw))
- tgt->eaddr = inany_loopback6;
else
- tgt->eaddr = ini->faddr;
+ nat_outbound(c, &ini->oaddr, &tgt->eaddr);
- tgt->eport = ini->fport;
+ tgt->eport = ini->oport;
/* The relevant addr_out controls the host side source address. This
* may be unspecified, which allows the kernel to pick an address.
*/
if (inany_v4(&tgt->eaddr))
- tgt->faddr = inany_from_v4(c->ip4.addr_out);
+ tgt->oaddr = inany_from_v4(c->ip4.addr_out);
else
- tgt->faddr.a6 = c->ip6.addr_out;
+ tgt->oaddr.a6 = c->ip6.addr_out;
/* Let the kernel pick a host side source port */
- tgt->fport = 0;
+ tgt->oport = 0;
if (proto == IPPROTO_UDP) {
/* But for UDP we preserve the source port */
- tgt->fport = ini->eport;
+ tgt->oport = ini->eport;
}
return PIF_HOST;
@@ -230,43 +650,81 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
if (!inany_is_loopback(&ini->eaddr) ||
- (!inany_is_loopback(&ini->faddr) && !inany_is_unspecified(&ini->faddr))) {
+ (!inany_is_loopback(&ini->oaddr) && !inany_is_unspecified(&ini->oaddr))) {
char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu",
pif_name(PIF_SPLICE),
inany_ntop(&ini->eaddr, estr, sizeof(estr)), ini->eport,
- inany_ntop(&ini->faddr, fstr, sizeof(fstr)), ini->fport);
+ inany_ntop(&ini->oaddr, fstr, sizeof(fstr)), ini->oport);
return PIF_NONE;
}
- if (inany_v4(&ini->eaddr))
+ if (!inany_is_unspecified(&ini->oaddr))
+ tgt->eaddr = ini->oaddr;
+ else if (inany_v4(&ini->oaddr))
tgt->eaddr = inany_loopback4;
else
tgt->eaddr = inany_loopback6;
- /* Preserve the specific loopback adddress used, but let the kernel pick
+ /* Preserve the specific loopback address used, but let the kernel pick
* a source port on the target side
*/
- tgt->faddr = ini->eaddr;
- tgt->fport = 0;
+ tgt->oaddr = ini->eaddr;
+ tgt->oport = 0;
- tgt->eport = ini->fport;
+ tgt->eport = ini->oport;
if (proto == IPPROTO_TCP)
tgt->eport += c->tcp.fwd_out.delta[tgt->eport];
else if (proto == IPPROTO_UDP)
tgt->eport += c->udp.fwd_out.delta[tgt->eport];
/* Let the kernel pick a host side source port */
- tgt->fport = 0;
+ tgt->oport = 0;
if (proto == IPPROTO_UDP)
/* But for UDP preserve the source port */
- tgt->fport = ini->eport;
+ tgt->oport = ini->eport;
return PIF_HOST;
}
/**
+ * nat_inbound() - Apply address translation for inbound (HOST to TAP)
+ * @c: Execution context
+ * @addr: Input address (as seen on HOST interface)
+ * @translated: Output address (as seen on TAP interface)
+ *
+ * Return: true on success, false if it couldn't translate the address
+ *
+ * Only handles translations that depend *only* on the address. Anything
+ * related to specific ports or flows is handled elsewhere.
+ */
+bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
+ union inany_addr *translated)
+{
+ if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_host_loopback) &&
+ inany_equals4(addr, &in4addr_loopback)) {
+ /* Specifically 127.0.0.1, not 127.0.0.0/8 */
+ *translated = inany_from_v4(c->ip4.map_host_loopback);
+ } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_host_loopback) &&
+ inany_equals6(addr, &in6addr_loopback)) {
+ translated->a6 = c->ip6.map_host_loopback;
+ } else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) &&
+ inany_equals4(addr, &c->ip4.addr)) {
+ *translated = inany_from_v4(c->ip4.map_guest_addr);
+ } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) &&
+ inany_equals6(addr, &c->ip6.addr)) {
+ translated->a6 = c->ip6.map_guest_addr;
+ } else if (fwd_guest_accessible(c, addr)) {
+ *translated = *addr;
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
+/**
* fwd_nat_from_host() - Determine to forward a flow from the host interface
* @c: Execution context
* @proto: Protocol (IP L4 protocol number)
@@ -280,53 +738,64 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
/* Common for spliced and non-spliced cases */
- tgt->eport = ini->fport;
+ tgt->eport = ini->oport;
if (proto == IPPROTO_TCP)
tgt->eport += c->tcp.fwd_in.delta[tgt->eport];
else if (proto == IPPROTO_UDP)
tgt->eport += c->udp.fwd_in.delta[tgt->eport];
- if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
+ if (!c->no_splice && inany_is_loopback(&ini->eaddr) &&
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
/* spliceable */
- /* Preserve the specific loopback adddress used, but let the
- * kernel pick a source port on the target side
+ /* The traffic will go over the guest's 'lo' interface, but by
+ * default use its external address, so we don't inadvertently
+ * expose services that listen only on the guest's loopback
+ * address. That can be overridden by --host-lo-to-ns-lo which
+ * will instead forward to the loopback address in the guest.
+ *
+ * In either case, let the kernel pick the source address to
+ * match.
*/
- tgt->faddr = ini->eaddr;
- tgt->fport = 0;
+ if (inany_v4(&ini->eaddr)) {
+ if (c->host_lo_to_ns_lo)
+ tgt->eaddr = inany_loopback4;
+ else
+ tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
+ tgt->oaddr = inany_any4;
+ } else {
+ if (c->host_lo_to_ns_lo)
+ tgt->eaddr = inany_loopback6;
+ else
+ tgt->eaddr.a6 = c->ip6.addr_seen;
+ tgt->oaddr = inany_any6;
+ }
+
+ /* Let the kernel pick source port */
+ tgt->oport = 0;
if (proto == IPPROTO_UDP)
/* But for UDP preserve the source port */
- tgt->fport = ini->eport;
-
- if (inany_v4(&ini->eaddr))
- tgt->eaddr = inany_loopback4;
- else
- tgt->eaddr = inany_loopback6;
+ tgt->oport = ini->eport;
return PIF_SPLICE;
}
- tgt->faddr = ini->eaddr;
- tgt->fport = ini->eport;
-
- if (inany_is_loopback4(&tgt->faddr) ||
- inany_is_unspecified4(&tgt->faddr) ||
- inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) {
- tgt->faddr = inany_from_v4(c->ip4.gw);
- } else if (inany_is_loopback6(&tgt->faddr) ||
- inany_equals6(&tgt->faddr, &c->ip6.addr_seen) ||
- inany_equals6(&tgt->faddr, &c->ip6.addr)) {
- if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
- tgt->faddr.a6 = c->ip6.gw;
- else
- tgt->faddr.a6 = c->ip6.addr_ll;
+ if (!nat_inbound(c, &ini->eaddr, &tgt->oaddr)) {
+ if (inany_v4(&ini->eaddr)) {
+ if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.our_tap_addr))
+ /* No source address we can use */
+ return PIF_NONE;
+ tgt->oaddr = inany_from_v4(c->ip4.our_tap_addr);
+ } else {
+ tgt->oaddr.a6 = c->ip6.our_tap_ll;
+ }
}
+ tgt->oport = ini->eport;
- if (inany_v4(&tgt->faddr)) {
+ if (inany_v4(&tgt->oaddr)) {
tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
} else {
- if (inany_is_linklocal6(&tgt->faddr))
+ if (inany_is_linklocal6(&tgt->oaddr))
tgt->eaddr.a6 = c->ip6.addr_ll_seen;
else
tgt->eaddr.a6 = c->ip6.addr_seen;