aboutgitcodebugslistschat
path: root/passt.h
diff options
context:
space:
mode:
Diffstat (limited to 'passt.h')
-rw-r--r--passt.h192
1 files changed, 103 insertions, 89 deletions
diff --git a/passt.h b/passt.h
index 76026f0..8693794 100644
--- a/passt.h
+++ b/passt.h
@@ -9,26 +9,6 @@
#define UNIX_SOCK_MAX 100
#define UNIX_SOCK_PATH "/tmp/passt_%i.socket"
-/**
- * struct tap_msg - Generic message descriptor for arrays of messages
- * @pkt_buf_offset: Offset from @pkt_buf
- * @len: Message length, with L2 headers
- */
-struct tap_msg {
- uint32_t pkt_buf_offset;
- uint16_t len;
-};
-
-/**
- * struct tap_l4_msg - Layer-4 message descriptor for protocol handlers
- * @pkt_buf_offset: Offset of message from @pkt_buf
- * @l4_len: Length of Layer-4 payload, host order
- */
-struct tap_l4_msg {
- uint32_t pkt_buf_offset;
- uint16_t l4_len;
-};
-
union epoll_ref;
#include <stdbool.h>
@@ -37,43 +17,23 @@ union epoll_ref;
#include "pif.h"
#include "packet.h"
+#include "siphash.h"
+#include "ip.h"
+#include "inany.h"
+#include "migrate.h"
#include "flow.h"
#include "icmp.h"
#include "fwd.h"
#include "tcp.h"
#include "udp.h"
+#include "vhost_user.h"
-/**
- * enum epoll_type - Different types of fds we poll over
+/* Default address for our end on the tap interface. Bit 0 of byte 0 must be 0
+ * (unicast) and bit 1 of byte 1 must be 1 (locally administered). Otherwise
+ * it's arbitrary.
*/
-enum epoll_type {
- /* Special value to indicate an invalid type */
- EPOLL_TYPE_NONE = 0,
- /* Connected TCP sockets */
- EPOLL_TYPE_TCP,
- /* Connected TCP sockets (spliced) */
- EPOLL_TYPE_TCP_SPLICE,
- /* Listening TCP sockets */
- EPOLL_TYPE_TCP_LISTEN,
- /* timerfds used for TCP timers */
- EPOLL_TYPE_TCP_TIMER,
- /* UDP sockets */
- EPOLL_TYPE_UDP,
- /* ICMP/ICMPv6 ping sockets */
- EPOLL_TYPE_PING,
- /* inotify fd watching for end of netns (pasta) */
- EPOLL_TYPE_NSQUIT_INOTIFY,
- /* timer fd watching for end of netns, fallback for inotify (pasta) */
- EPOLL_TYPE_NSQUIT_TIMER,
- /* tuntap character device */
- EPOLL_TYPE_TAP_PASTA,
- /* socket connected to qemu */
- EPOLL_TYPE_TAP_PASST,
- /* socket listening for qemu socket connections */
- EPOLL_TYPE_TAP_LISTEN,
-
- EPOLL_NUM_TYPES,
-};
+#define MAC_OUR_LAA \
+ ((uint8_t [ETH_ALEN]){0x9a, 0x55, 0x9a, 0x55, 0x9a, 0x55})
/**
* union epoll_ref - Breakdown of reference for epoll fd bookkeeping
@@ -85,6 +45,7 @@ enum epoll_type {
* @icmp: ICMP-specific reference part
* @data: Data handled by protocol handlers
* @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone
+ * @queue: vhost-user queue index for this fd
* @u64: Opaque reference for epoll_ctl() and epoll_wait()
*/
union epoll_ref {
@@ -97,9 +58,10 @@ union epoll_ref {
uint32_t flow;
flow_sidx_t flowside;
union tcp_listen_epoll_ref tcp_listen;
- union udp_epoll_ref udp;
+ union udp_listen_epoll_ref udp;
uint32_t data;
int nsdir_fd;
+ int queue;
};
};
uint64_t u64;
@@ -107,13 +69,9 @@ union epoll_ref {
static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
"epoll_ref must have same size as epoll_data");
-#define TAP_BUF_BYTES \
- ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
-#define TAP_BUF_FILL (TAP_BUF_BYTES - ETH_MAX_MTU - sizeof(uint32_t))
-#define TAP_MSGS \
- DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
+/* Large enough for ~128 maximum size frames */
+#define PKT_BUF_BYTES (8UL << 20)
-#define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0)
extern char pkt_buf [PKT_BUF_BYTES];
extern char *epoll_type_str[];
@@ -137,58 +95,89 @@ struct fqdn {
enum passt_modes {
MODE_PASST,
MODE_PASTA,
+ MODE_VU,
};
/**
* struct ip4_ctx - IPv4 execution context
- * @addr: IPv4 address for external, routable interface
+ * @addr: IPv4 address assigned to guest
* @addr_seen: Latest IPv4 address seen as source from tap
* @prefixlen: IPv4 prefix length (netmask)
- * @gw: Default IPv4 gateway, network order
- * @dns: DNS addresses for DHCP, zero-terminated, network order
- * @dns_match: Forward DNS query if sent to this address, network order
- * @dns_host: Use this DNS on the host for forwarding, network order
+ * @guest_gw: IPv4 gateway as seen by the guest
+ * @map_host_loopback: Outbound connections to this address are NATted to the
+ * host's 127.0.0.1
+ * @map_guest_addr: Outbound connections to this address are NATted to the
+ * guest's assigned address
+ * @dns: DNS addresses for DHCP, zero-terminated
+ * @dns_match: Forward DNS query if sent to this address
+ * @our_tap_addr: IPv4 address for passt's use on tap
+ * @dns_host: Use this DNS on the host for forwarding
* @addr_out: Optional source address for outbound traffic
* @ifname_out: Optional interface name to bind outbound sockets to
+ * @no_copy_routes: Don't copy all routes when configuring target namespace
+ * @no_copy_addrs: Don't copy all addresses when configuring namespace
*/
struct ip4_ctx {
+ /* PIF_TAP addresses */
struct in_addr addr;
struct in_addr addr_seen;
int prefix_len;
- struct in_addr gw;
+ struct in_addr guest_gw;
+ struct in_addr map_host_loopback;
+ struct in_addr map_guest_addr;
struct in_addr dns[MAXNS + 1];
struct in_addr dns_match;
- struct in_addr dns_host;
+ struct in_addr our_tap_addr;
+ /* PIF_HOST addresses */
+ struct in_addr dns_host;
struct in_addr addr_out;
+
char ifname_out[IFNAMSIZ];
+
+ bool no_copy_routes;
+ bool no_copy_addrs;
};
/**
* struct ip6_ctx - IPv6 execution context
- * @addr: IPv6 address for external, routable interface
- * @addr_ll: Link-local IPv6 address on external, routable interface
+ * @addr: IPv6 address assigned to guest
* @addr_seen: Latest IPv6 global/site address seen as source from tap
* @addr_ll_seen: Latest IPv6 link-local address seen as source from tap
- * @gw: Default IPv6 gateway
+ * @guest_gw: IPv6 gateway as seen by the guest
+ * @map_host_loopback: Outbound connections to this address are NATted to the
+ * host's [::1]
+ * @map_guest_addr: Outbound connections to this address are NATted to the
+ * guest's assigned address
* @dns: DNS addresses for DHCPv6 and NDP, zero-terminated
* @dns_match: Forward DNS query if sent to this address
+ * @our_tap_ll: Link-local IPv6 address for passt's use on tap
* @dns_host: Use this DNS on the host for forwarding
* @addr_out: Optional source address for outbound traffic
* @ifname_out: Optional interface name to bind outbound sockets to
+ * @no_copy_routes: Don't copy all routes when configuring target namespace
+ * @no_copy_addrs: Don't copy all addresses when configuring namespace
*/
struct ip6_ctx {
+ /* PIF_TAP addresses */
struct in6_addr addr;
- struct in6_addr addr_ll;
struct in6_addr addr_seen;
struct in6_addr addr_ll_seen;
- struct in6_addr gw;
+ struct in6_addr guest_gw;
+ struct in6_addr map_host_loopback;
+ struct in6_addr map_guest_addr;
struct in6_addr dns[MAXNS + 1];
struct in6_addr dns_match;
- struct in6_addr dns_host;
+ struct in6_addr our_tap_ll;
+ /* PIF_HOST addresses */
+ struct in6_addr dns_host;
struct in6_addr addr_out;
+
char ifname_out[IFNAMSIZ];
+
+ bool no_copy_routes;
+ bool no_copy_addrs;
};
#include <netinet/if_ether.h>
@@ -200,11 +189,12 @@ struct ip6_ctx {
* @trace: Enable tracing (extra debug) mode
* @quiet: Don't print informational messages
* @foreground: Run in foreground, don't log to stderr by default
- * @force_stderr: Force logging to stderr
* @nofile: Maximum number of open files (ulimit -n)
* @sock_path: Path for UNIX domain socket
+ * @repair_path: TCP_REPAIR helper path, can be "none", empty for default
* @pcap: Path for packet capture file
- * @pid_file: Path to PID file, empty string if not configured
+ * @pidfile: Path to PID file, empty string if not configured
+ * @pidfile_fd: File descriptor for PID file, -1 if none
* @pasta_netns_fd: File descriptor for network namespace in pasta mode
* @no_netns_quit: In pasta mode, don't exit if fs-bound namespace is gone
* @netns_base: Base name for fs-bound namespace, if any, in pasta mode
@@ -212,19 +202,21 @@ struct ip6_ctx {
* @epollfd: File descriptor for epoll instance
* @fd_tap_listen: File descriptor for listening AF_UNIX socket, if any
* @fd_tap: AF_UNIX socket, tuntap device, or pre-opened socket
- * @mac: Host MAC address
- * @mac_guest: MAC address of guest or namespace, seen or configured
+ * @fd_repair_listen: File descriptor for listening TCP_REPAIR socket, if any
+ * @fd_repair: Connected AF_UNIX socket for TCP_REPAIR helper
+ * @our_tap_mac: Pasta/passt's MAC on the tap link
+ * @guest_mac: MAC address of guest or namespace, seen or configured
* @hash_secret: 128-bit secret for siphash functions
- * @ifi4: Index of template interface for IPv4, 0 if IPv4 disabled
+ * @ifi4: Template interface for IPv4, -1: none, 0: IPv4 disabled
* @ip: IPv4 configuration
* @dns_search: DNS search list
- * @ifi6: Index of template interface for IPv6, 0 if IPv6 disabled
+ * @hostname: Guest hostname
+ * @fqdn: Guest FQDN
+ * @ifi6: Template interface for IPv6, -1: none, 0: IPv6 disabled
* @ip6: IPv6 configuration
* @pasta_ifn: Name of namespace interface for pasta
* @pasta_ifi: Index of namespace interface for pasta
* @pasta_conf_ns: Configure namespace after creating it
- * @no_copy_routes: Don't copy all routes when configuring target namespace
- * @no_copy_addrs: Don't copy all addresses when configuring namespace
* @no_tcp: Disable TCP operation
* @tcp: Context for TCP protocol handler
* @no_tcp: Disable UDP operation
@@ -240,9 +232,15 @@ struct ip6_ctx {
* @no_dhcpv6: Disable DHCPv6 server
* @no_ndp: Disable NDP handler altogether
* @no_ra: Disable router advertisements
- * @no_map_gw: Don't map connections, untracked UDP to gateway to host
+ * @no_splice: Disable socket splicing for inbound traffic
+ * @host_lo_to_ns_lo: Map host loopback addresses to ns loopback addresses
+ * @freebind: Allow binding of non-local addresses for forwarding
* @low_wmem: Low probed net.core.wmem_max
* @low_rmem: Low probed net.core.rmem_max
+ * @vdev: vhost-user device
+ * @device_state_fd: Device state migration channel
+ * @device_state_result: Device state migration result
+ * @migrate_target: Are we the target, on the next migration request?
*/
struct ctx {
enum passt_modes mode;
@@ -250,11 +248,14 @@ struct ctx {
int trace;
int quiet;
int foreground;
- int force_stderr;
int nofile;
char sock_path[UNIX_PATH_MAX];
+ char repair_path[UNIX_PATH_MAX];
char pcap[PATH_MAX];
- char pid_file[PATH_MAX];
+
+ char pidfile[PATH_MAX];
+ int pidfile_fd;
+
int one_off;
int pasta_netns_fd;
@@ -266,23 +267,28 @@ struct ctx {
int epollfd;
int fd_tap_listen;
int fd_tap;
- unsigned char mac[ETH_ALEN];
- unsigned char mac_guest[ETH_ALEN];
+ int fd_repair_listen;
+ int fd_repair;
+ unsigned char our_tap_mac[ETH_ALEN];
+ unsigned char guest_mac[ETH_ALEN];
+ uint16_t mtu;
+
uint64_t hash_secret[2];
- unsigned int ifi4;
+ int ifi4;
struct ip4_ctx ip4;
struct fqdn dns_search[MAXDNSRCH];
- unsigned int ifi6;
+ char hostname[PASST_MAXDNAME];
+ char fqdn[PASST_MAXDNAME];
+
+ int ifi6;
struct ip6_ctx ip6;
char pasta_ifn[IF_NAMESIZE];
unsigned int pasta_ifi;
int pasta_conf_ns;
- int no_copy_routes;
- int no_copy_addrs;
int no_tcp;
struct tcp_ctx tcp;
@@ -291,7 +297,6 @@ struct ctx {
int no_icmp;
struct icmp_ctx icmp;
- int mtu;
int no_dns;
int no_dns_search;
int no_dhcp_dns;
@@ -300,10 +305,19 @@ struct ctx {
int no_dhcpv6;
int no_ndp;
int no_ra;
- int no_map_gw;
+ int no_splice;
+ int host_lo_to_ns_lo;
+ int freebind;
int low_wmem;
int low_rmem;
+
+ struct vu_dev *vdev;
+
+ /* Migration */
+ int device_state_fd;
+ int device_state_result;
+ bool migrate_target;
};
void proto_update_l2_buf(const unsigned char *eth_d,