aboutgitcodebugslistschat
path: root/udp.c
diff options
context:
space:
mode:
Diffstat (limited to 'udp.c')
-rw-r--r--udp.c232
1 files changed, 83 insertions, 149 deletions
diff --git a/udp.c b/udp.c
index 4b625b7..464aa09 100644
--- a/udp.c
+++ b/udp.c
@@ -26,7 +26,10 @@
*
* We track pseudo-connections of this type as flow table entries of type
* FLOW_UDP. We store the time of the last traffic on the flow in uflow->ts,
- * and let the flow expire if there is no traffic for UDP_CONN_TIMEOUT seconds.
+ * and let the flow expire if there is no traffic for UDP_TIMEOUT seconds for
+ * unidirectional flows and flows with only one datagram and one reply, or
+ * UDP_TIMEOUT_STREAM seconds for bidirectional flows with more than one
+ * datagram on either side.
*
* NOTE: This won't handle multicast protocols, or some protocols with different
* port usage. We'll need specific logic if we want to handle those.
@@ -118,16 +121,19 @@
#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
+#define UDP_TIMEOUT "/proc/sys/net/netfilter/nf_conntrack_udp_timeout"
+#define UDP_TIMEOUT_STREAM \
+ "/proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream"
+
+#define UDP_TIMEOUT_DEFAULT 30 /* s */
+#define UDP_TIMEOUT_STREAM_DEFAULT 120 /* s */
+
/* Maximum UDP data to be returned in ICMP messages */
#define ICMP4_MAX_DLEN 8
#define ICMP6_MAX_DLEN (IPV6_MIN_MTU \
- sizeof(struct udphdr) \
- sizeof(struct ipv6hdr))
-/* "Spliced" sockets indexed by bound port (host order) */
-static int udp_splice_ns [IP_VERSIONS][NUM_PORTS];
-static int udp_splice_init[IP_VERSIONS][NUM_PORTS];
-
/* Static buffers */
/* UDP header and data for inbound messages */
@@ -164,17 +170,19 @@ udp_meta[UDP_MAX_FRAMES];
/**
* enum udp_iov_idx - Indices for the buffers making up a single UDP frame
- * @UDP_IOV_TAP tap specific header
- * @UDP_IOV_ETH Ethernet header
- * @UDP_IOV_IP IP (v4/v6) header
- * @UDP_IOV_PAYLOAD IP payload (UDP header + data)
- * @UDP_NUM_IOVS the number of entries in the iovec array
+ * @UDP_IOV_TAP tap specific header
+ * @UDP_IOV_ETH Ethernet header
+ * @UDP_IOV_IP IP (v4/v6) header
+ * @UDP_IOV_PAYLOAD IP payload (UDP header + data)
+ * @UDP_IOV_ETH_PAD Ethernet (802.3) padding to 60 bytes
+ * @UDP_NUM_IOVS the number of entries in the iovec array
*/
enum udp_iov_idx {
UDP_IOV_TAP,
UDP_IOV_ETH,
UDP_IOV_IP,
UDP_IOV_PAYLOAD,
+ UDP_IOV_ETH_PAD,
UDP_NUM_IOVS,
};
@@ -192,19 +200,6 @@ static struct mmsghdr udp_mh_splice [UDP_MAX_FRAMES];
static struct iovec udp_l2_iov [UDP_MAX_FRAMES][UDP_NUM_IOVS];
/**
- * udp_portmap_clear() - Clear UDP port map before configuration
- */
-void udp_portmap_clear(void)
-{
- unsigned i;
-
- for (i = 0; i < NUM_PORTS; i++) {
- udp_splice_ns[V4][i] = udp_splice_ns[V6][i] = -1;
- udp_splice_init[V4][i] = udp_splice_init[V6][i] = -1;
- }
-}
-
-/**
* udp_update_l2_buf() - Update L2 buffers with Ethernet and IPv4 addresses
* @eth_d: Ethernet destination address, NULL if unchanged
*/
@@ -239,6 +234,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i)
tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp_eth_hdr[i]);
tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph);
tiov[UDP_IOV_PAYLOAD].iov_base = payload;
+ tiov[UDP_IOV_ETH_PAD].iov_base = eth_pad;
mh->msg_iov = siov;
mh->msg_iovlen = 1;
@@ -345,6 +341,22 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
}
/**
+ * udp_tap_pad() - Calculate padding to send out of padding (zero) buffer
+ * @iov: Pointer to iovec of frame parts we're about to send
+ */
+static void udp_tap_pad(struct iovec *iov)
+{
+ size_t l2len = iov[UDP_IOV_ETH].iov_len +
+ iov[UDP_IOV_IP].iov_len +
+ iov[UDP_IOV_PAYLOAD].iov_len;
+
+ if (l2len < ETH_ZLEN)
+ iov[UDP_IOV_ETH_PAD].iov_len = ETH_ZLEN - l2len;
+ else
+ iov[UDP_IOV_ETH_PAD].iov_len = 0;
+}
+
+/**
* udp_tap_prepare() - Convert one datagram into a tap frame
* @mmh: Receiving mmsghdr array
* @idx: Index of the datagram to prepare
@@ -362,23 +374,31 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
struct udp_payload_t *bp = &udp_payload[idx];
struct udp_meta_t *bm = &udp_meta[idx];
- size_t l4len;
+ size_t l4len, l2len;
eth_update_mac(eh, NULL, tap_omac);
if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
mmh[idx].msg_len, no_udp_csum);
- tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + ETH_HLEN);
+
+ l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN);
+ tap_hdr_update(&bm->taph, l2len);
+
eh->h_proto = htons_constant(ETH_P_IPV6);
(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
} else {
l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
mmh[idx].msg_len, no_udp_csum);
- tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + ETH_HLEN);
+
+ l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN);
+ tap_hdr_update(&bm->taph, l2len);
+
eh->h_proto = htons_constant(ETH_P_IP);
(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
}
(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
+
+ udp_tap_pad(*tap_iov);
}
/**
@@ -828,12 +848,13 @@ static void udp_buf_sock_to_tap(const struct ctx *c, int s, int n,
* udp_sock_fwd() - Forward datagrams from a possibly unconnected socket
* @c: Execution context
* @s: Socket to forward from
+ * @rule_hint: Forwarding rule to use, or -1 if unknown
* @frompif: Interface to which @s belongs
* @port: Our (local) port number of @s
* @now: Current timestamp
*/
-void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
- in_port_t port, const struct timespec *now)
+void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
+ uint8_t frompif, in_port_t port, const struct timespec *now)
{
union sockaddr_inany src;
union inany_addr dst;
@@ -858,7 +879,8 @@ void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
continue;
}
- tosidx = udp_flow_from_sock(c, frompif, &dst, port, &src, now);
+ tosidx = udp_flow_from_sock(c, frompif, &dst, port, &src,
+ rule_hint, now);
topif = pif_at_sidx(tosidx);
if (pif_is_socket(topif)) {
@@ -900,8 +922,10 @@ void udp_listen_sock_handler(const struct ctx *c,
union epoll_ref ref, uint32_t events,
const struct timespec *now)
{
- if (events & (EPOLLERR | EPOLLIN))
- udp_sock_fwd(c, ref.fd, ref.udp.pif, ref.udp.port, now);
+ if (events & (EPOLLERR | EPOLLIN)) {
+ udp_sock_fwd(c, ref.fd, ref.listen.rule,
+ ref.listen.pif, ref.listen.port, now);
+ }
}
/**
@@ -939,7 +963,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
int s = ref.fd;
flow_trace(uflow, "Received data on reply socket");
- uflow->ts = now->tv_sec;
+ udp_flow_activity(uflow, !tosidx.sidei, now);
if (pif_is_socket(topif)) {
udp_sock_to_sock(c, ref.fd, n, tosidx);
@@ -1102,64 +1126,41 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
}
/**
- * udp_sock_init() - Initialise listening socket for a given port
+ * udp_listen() - Initialise listening socket for a given port
* @c: Execution context
* @pif: Interface to open the socket for (PIF_HOST or PIF_SPLICE)
+ * @rule: Index of relevant forwarding rule
* @addr: Pointer to address for binding, NULL if not configured
* @ifname: Name of interface to bind to, NULL if not configured
* @port: Port, host order
*
- * Return: 0 on success, negative error code on failure
+ * Return: socket fd on success, negative error code on failure
*/
-int udp_sock_init(const struct ctx *c, uint8_t pif,
- const union inany_addr *addr, const char *ifname,
- in_port_t port)
+int udp_listen(const struct ctx *c, uint8_t pif, unsigned rule,
+ const union inany_addr *addr, const char *ifname, in_port_t port)
{
- union udp_listen_epoll_ref uref = {
- .pif = pif,
- .port = port,
- };
- int (*socks)[NUM_PORTS];
int s;
ASSERT(!c->no_udp);
- ASSERT(pif_is_socket(pif));
-
- if (pif == PIF_HOST)
- socks = udp_splice_init;
- else
- socks = udp_splice_ns;
if (!c->ifi4) {
if (!addr)
/* Restrict to v6 only */
addr = &inany_any6;
else if (inany_v4(addr))
- /* Nothing to do */
- return 0;
+ return -EAFNOSUPPORT;
}
if (!c->ifi6) {
if (!addr)
/* Restrict to v4 only */
addr = &inany_any4;
else if (!inany_v4(addr))
- /* Nothing to do */
- return 0;
- }
-
- s = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, pif,
- addr, ifname, port, uref.u32);
- if (s > FD_REF_MAX) {
- close(s);
- s = -EIO;
+ return -EAFNOSUPPORT;
}
- if (!addr || inany_v4(addr))
- socks[V4][port] = s < 0 ? -1 : s;
- if (!addr || !inany_v4(addr))
- socks[V6][port] = s < 0 ? -1 : s;
+ s = pif_listen(c, EPOLL_TYPE_UDP_LISTEN, pif, addr, ifname, port, rule);
- return s < 0 ? s : 0;
+ return s;
}
/**
@@ -1183,112 +1184,45 @@ static void udp_splice_iov_init(void)
}
/**
- * udp_ns_sock_init() - Init socket to listen for spliced outbound connections
+ * udp_get_timeout_params() - Get host kernel UDP timeout parameters
* @c: Execution context
- * @port: Port, host order
*/
-static void udp_ns_sock_init(const struct ctx *c, in_port_t port)
+static void udp_get_timeout_params(struct ctx *c)
{
- ASSERT(!c->no_udp);
+ intmax_t v;
- if (!c->no_bindtodevice) {
- udp_sock_init(c, PIF_SPLICE, NULL, "lo", port);
- return;
- }
+ v = read_file_integer(UDP_TIMEOUT, UDP_TIMEOUT_DEFAULT);
+ c->udp.timeout = v;
- if (c->ifi4)
- udp_sock_init(c, PIF_SPLICE, &inany_loopback4, NULL, port);
- if (c->ifi6)
- udp_sock_init(c, PIF_SPLICE, &inany_loopback6, NULL, port);
-}
+ v = read_file_integer(UDP_TIMEOUT_STREAM, UDP_TIMEOUT_STREAM_DEFAULT);
+ c->udp.stream_timeout = v;
-/**
- * udp_port_rebind() - Rebind ports to match forward maps
- * @c: Execution context
- * @outbound: True to remap outbound forwards, otherwise inbound
- *
- * Must be called in namespace context if @outbound is true.
- */
-static void udp_port_rebind(struct ctx *c, bool outbound)
-{
- int (*socks)[NUM_PORTS] = outbound ? udp_splice_ns : udp_splice_init;
- const uint8_t *fmap
- = outbound ? c->udp.fwd_out.map : c->udp.fwd_in.map;
- unsigned port;
-
- for (port = 0; port < NUM_PORTS; port++) {
- if (!bitmap_isset(fmap, port)) {
- if (socks[V4][port] >= 0) {
- close(socks[V4][port]);
- socks[V4][port] = -1;
- }
-
- if (socks[V6][port] >= 0) {
- close(socks[V6][port]);
- socks[V6][port] = -1;
- }
-
- continue;
- }
-
- if ((c->ifi4 && socks[V4][port] == -1) ||
- (c->ifi6 && socks[V6][port] == -1)) {
- if (outbound)
- udp_ns_sock_init(c, port);
- else
- udp_sock_init(c, PIF_HOST, NULL, NULL, port);
- }
- }
-}
-
-/**
- * udp_port_rebind_outbound() - Rebind ports in namespace
- * @arg: Execution context
- *
- * Called with NS_CALL()
- *
- * Return: 0
- */
-static int udp_port_rebind_outbound(void *arg)
-{
- struct ctx *c = (struct ctx *)arg;
-
- ns_enter(c);
- udp_port_rebind(c, true);
-
- return 0;
-}
-
-/**
- * udp_port_rebind_all() - Rebind ports to match forward maps (in host & ns)
- * @c: Execution context
- */
-void udp_port_rebind_all(struct ctx *c)
-{
- ASSERT(c->mode == MODE_PASTA && !c->no_udp);
-
- if (c->udp.fwd_out.mode == FWD_AUTO)
- NS_CALL(udp_port_rebind_outbound, c);
-
- if (c->udp.fwd_in.mode == FWD_AUTO)
- udp_port_rebind(c, false);
+ debug("Using UDP timeout parameters, timeout: %d, stream_timeout: %d",
+ c->udp.timeout, c->udp.stream_timeout);
}
/**
* udp_init() - Initialise per-socket data, and sockets in namespace
* @c: Execution context
*
- * Return: 0
+ * Return: 0 on success, -1 on failure
*/
int udp_init(struct ctx *c)
{
ASSERT(!c->no_udp);
+ udp_get_timeout_params(c);
+
udp_iov_init(c);
+ if (fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP) < 0)
+ return -1;
+
if (c->mode == MODE_PASTA) {
udp_splice_iov_init();
- NS_CALL(udp_port_rebind_outbound, c);
+ if (fwd_listen_sync(c, &c->udp.fwd_out,
+ PIF_SPLICE, IPPROTO_UDP) < 0)
+ return -1;
}
return 0;