diff options
Diffstat (limited to 'udp.c')
| -rw-r--r-- | udp.c | 232 |
1 files changed, 83 insertions, 149 deletions
@@ -26,7 +26,10 @@ * * We track pseudo-connections of this type as flow table entries of type * FLOW_UDP. We store the time of the last traffic on the flow in uflow->ts, - * and let the flow expire if there is no traffic for UDP_CONN_TIMEOUT seconds. + * and let the flow expire if there is no traffic for UDP_TIMEOUT seconds for + * unidirectional flows and flows with only one datagram and one reply, or + * UDP_TIMEOUT_STREAM seconds for bidirectional flows with more than one + * datagram on either side. * * NOTE: This won't handle multicast protocols, or some protocols with different * port usage. We'll need specific logic if we want to handle those. @@ -118,16 +121,19 @@ #define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ +#define UDP_TIMEOUT "/proc/sys/net/netfilter/nf_conntrack_udp_timeout" +#define UDP_TIMEOUT_STREAM \ + "/proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream" + +#define UDP_TIMEOUT_DEFAULT 30 /* s */ +#define UDP_TIMEOUT_STREAM_DEFAULT 120 /* s */ + /* Maximum UDP data to be returned in ICMP messages */ #define ICMP4_MAX_DLEN 8 #define ICMP6_MAX_DLEN (IPV6_MIN_MTU \ - sizeof(struct udphdr) \ - sizeof(struct ipv6hdr)) -/* "Spliced" sockets indexed by bound port (host order) */ -static int udp_splice_ns [IP_VERSIONS][NUM_PORTS]; -static int udp_splice_init[IP_VERSIONS][NUM_PORTS]; - /* Static buffers */ /* UDP header and data for inbound messages */ @@ -164,17 +170,19 @@ udp_meta[UDP_MAX_FRAMES]; /** * enum udp_iov_idx - Indices for the buffers making up a single UDP frame - * @UDP_IOV_TAP tap specific header - * @UDP_IOV_ETH Ethernet header - * @UDP_IOV_IP IP (v4/v6) header - * @UDP_IOV_PAYLOAD IP payload (UDP header + data) - * @UDP_NUM_IOVS the number of entries in the iovec array + * @UDP_IOV_TAP tap specific header + * @UDP_IOV_ETH Ethernet header + * @UDP_IOV_IP IP (v4/v6) header + * @UDP_IOV_PAYLOAD IP payload (UDP header + data) + * @UDP_IOV_ETH_PAD Ethernet (802.3) padding to 60 bytes + * @UDP_NUM_IOVS the number of entries in the iovec array */ enum udp_iov_idx { UDP_IOV_TAP, UDP_IOV_ETH, UDP_IOV_IP, UDP_IOV_PAYLOAD, + UDP_IOV_ETH_PAD, UDP_NUM_IOVS, }; @@ -192,19 +200,6 @@ static struct mmsghdr udp_mh_splice [UDP_MAX_FRAMES]; static struct iovec udp_l2_iov [UDP_MAX_FRAMES][UDP_NUM_IOVS]; /** - * udp_portmap_clear() - Clear UDP port map before configuration - */ -void udp_portmap_clear(void) -{ - unsigned i; - - for (i = 0; i < NUM_PORTS; i++) { - udp_splice_ns[V4][i] = udp_splice_ns[V6][i] = -1; - udp_splice_init[V4][i] = udp_splice_init[V6][i] = -1; - } -} - -/** * udp_update_l2_buf() - Update L2 buffers with Ethernet and IPv4 addresses * @eth_d: Ethernet destination address, NULL if unchanged */ @@ -239,6 +234,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp_eth_hdr[i]); tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph); tiov[UDP_IOV_PAYLOAD].iov_base = payload; + tiov[UDP_IOV_ETH_PAD].iov_base = eth_pad; mh->msg_iov = siov; mh->msg_iovlen = 1; @@ -345,6 +341,22 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, } /** + * udp_tap_pad() - Calculate padding to send out of padding (zero) buffer + * @iov: Pointer to iovec of frame parts we're about to send + */ +static void udp_tap_pad(struct iovec *iov) +{ + size_t l2len = iov[UDP_IOV_ETH].iov_len + + iov[UDP_IOV_IP].iov_len + + iov[UDP_IOV_PAYLOAD].iov_len; + + if (l2len < ETH_ZLEN) + iov[UDP_IOV_ETH_PAD].iov_len = ETH_ZLEN - l2len; + else + iov[UDP_IOV_ETH_PAD].iov_len = 0; +} + +/** * udp_tap_prepare() - Convert one datagram into a tap frame * @mmh: Receiving mmsghdr array * @idx: Index of the datagram to prepare @@ -362,23 +374,31 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base; struct udp_payload_t *bp = &udp_payload[idx]; struct udp_meta_t *bm = &udp_meta[idx]; - size_t l4len; + size_t l4len, l2len; eth_update_mac(eh, NULL, tap_omac); if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) { l4len = udp_update_hdr6(&bm->ip6h, bp, toside, mmh[idx].msg_len, no_udp_csum); - tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + ETH_HLEN); + + l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN); + tap_hdr_update(&bm->taph, l2len); + eh->h_proto = htons_constant(ETH_P_IPV6); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h); } else { l4len = udp_update_hdr4(&bm->ip4h, bp, toside, mmh[idx].msg_len, no_udp_csum); - tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + ETH_HLEN); + + l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN); + tap_hdr_update(&bm->taph, l2len); + eh->h_proto = htons_constant(ETH_P_IP); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h); } (*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len; + + udp_tap_pad(*tap_iov); } /** @@ -828,12 +848,13 @@ static void udp_buf_sock_to_tap(const struct ctx *c, int s, int n, * udp_sock_fwd() - Forward datagrams from a possibly unconnected socket * @c: Execution context * @s: Socket to forward from + * @rule_hint: Forwarding rule to use, or -1 if unknown * @frompif: Interface to which @s belongs * @port: Our (local) port number of @s * @now: Current timestamp */ -void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif, - in_port_t port, const struct timespec *now) +void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, + uint8_t frompif, in_port_t port, const struct timespec *now) { union sockaddr_inany src; union inany_addr dst; @@ -858,7 +879,8 @@ void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif, continue; } - tosidx = udp_flow_from_sock(c, frompif, &dst, port, &src, now); + tosidx = udp_flow_from_sock(c, frompif, &dst, port, &src, + rule_hint, now); topif = pif_at_sidx(tosidx); if (pif_is_socket(topif)) { @@ -900,8 +922,10 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now) { - if (events & (EPOLLERR | EPOLLIN)) - udp_sock_fwd(c, ref.fd, ref.udp.pif, ref.udp.port, now); + if (events & (EPOLLERR | EPOLLIN)) { + udp_sock_fwd(c, ref.fd, ref.listen.rule, + ref.listen.pif, ref.listen.port, now); + } } /** @@ -939,7 +963,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref, int s = ref.fd; flow_trace(uflow, "Received data on reply socket"); - uflow->ts = now->tv_sec; + udp_flow_activity(uflow, !tosidx.sidei, now); if (pif_is_socket(topif)) { udp_sock_to_sock(c, ref.fd, n, tosidx); @@ -1102,64 +1126,41 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, } /** - * udp_sock_init() - Initialise listening socket for a given port + * udp_listen() - Initialise listening socket for a given port * @c: Execution context * @pif: Interface to open the socket for (PIF_HOST or PIF_SPLICE) + * @rule: Index of relevant forwarding rule * @addr: Pointer to address for binding, NULL if not configured * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order * - * Return: 0 on success, negative error code on failure + * Return: socket fd on success, negative error code on failure */ -int udp_sock_init(const struct ctx *c, uint8_t pif, - const union inany_addr *addr, const char *ifname, - in_port_t port) +int udp_listen(const struct ctx *c, uint8_t pif, unsigned rule, + const union inany_addr *addr, const char *ifname, in_port_t port) { - union udp_listen_epoll_ref uref = { - .pif = pif, - .port = port, - }; - int (*socks)[NUM_PORTS]; int s; ASSERT(!c->no_udp); - ASSERT(pif_is_socket(pif)); - - if (pif == PIF_HOST) - socks = udp_splice_init; - else - socks = udp_splice_ns; if (!c->ifi4) { if (!addr) /* Restrict to v6 only */ addr = &inany_any6; else if (inany_v4(addr)) - /* Nothing to do */ - return 0; + return -EAFNOSUPPORT; } if (!c->ifi6) { if (!addr) /* Restrict to v4 only */ addr = &inany_any4; else if (!inany_v4(addr)) - /* Nothing to do */ - return 0; - } - - s = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, pif, - addr, ifname, port, uref.u32); - if (s > FD_REF_MAX) { - close(s); - s = -EIO; + return -EAFNOSUPPORT; } - if (!addr || inany_v4(addr)) - socks[V4][port] = s < 0 ? -1 : s; - if (!addr || !inany_v4(addr)) - socks[V6][port] = s < 0 ? -1 : s; + s = pif_listen(c, EPOLL_TYPE_UDP_LISTEN, pif, addr, ifname, port, rule); - return s < 0 ? s : 0; + return s; } /** @@ -1183,112 +1184,45 @@ static void udp_splice_iov_init(void) } /** - * udp_ns_sock_init() - Init socket to listen for spliced outbound connections + * udp_get_timeout_params() - Get host kernel UDP timeout parameters * @c: Execution context - * @port: Port, host order */ -static void udp_ns_sock_init(const struct ctx *c, in_port_t port) +static void udp_get_timeout_params(struct ctx *c) { - ASSERT(!c->no_udp); + intmax_t v; - if (!c->no_bindtodevice) { - udp_sock_init(c, PIF_SPLICE, NULL, "lo", port); - return; - } + v = read_file_integer(UDP_TIMEOUT, UDP_TIMEOUT_DEFAULT); + c->udp.timeout = v; - if (c->ifi4) - udp_sock_init(c, PIF_SPLICE, &inany_loopback4, NULL, port); - if (c->ifi6) - udp_sock_init(c, PIF_SPLICE, &inany_loopback6, NULL, port); -} + v = read_file_integer(UDP_TIMEOUT_STREAM, UDP_TIMEOUT_STREAM_DEFAULT); + c->udp.stream_timeout = v; -/** - * udp_port_rebind() - Rebind ports to match forward maps - * @c: Execution context - * @outbound: True to remap outbound forwards, otherwise inbound - * - * Must be called in namespace context if @outbound is true. - */ -static void udp_port_rebind(struct ctx *c, bool outbound) -{ - int (*socks)[NUM_PORTS] = outbound ? udp_splice_ns : udp_splice_init; - const uint8_t *fmap - = outbound ? c->udp.fwd_out.map : c->udp.fwd_in.map; - unsigned port; - - for (port = 0; port < NUM_PORTS; port++) { - if (!bitmap_isset(fmap, port)) { - if (socks[V4][port] >= 0) { - close(socks[V4][port]); - socks[V4][port] = -1; - } - - if (socks[V6][port] >= 0) { - close(socks[V6][port]); - socks[V6][port] = -1; - } - - continue; - } - - if ((c->ifi4 && socks[V4][port] == -1) || - (c->ifi6 && socks[V6][port] == -1)) { - if (outbound) - udp_ns_sock_init(c, port); - else - udp_sock_init(c, PIF_HOST, NULL, NULL, port); - } - } -} - -/** - * udp_port_rebind_outbound() - Rebind ports in namespace - * @arg: Execution context - * - * Called with NS_CALL() - * - * Return: 0 - */ -static int udp_port_rebind_outbound(void *arg) -{ - struct ctx *c = (struct ctx *)arg; - - ns_enter(c); - udp_port_rebind(c, true); - - return 0; -} - -/** - * udp_port_rebind_all() - Rebind ports to match forward maps (in host & ns) - * @c: Execution context - */ -void udp_port_rebind_all(struct ctx *c) -{ - ASSERT(c->mode == MODE_PASTA && !c->no_udp); - - if (c->udp.fwd_out.mode == FWD_AUTO) - NS_CALL(udp_port_rebind_outbound, c); - - if (c->udp.fwd_in.mode == FWD_AUTO) - udp_port_rebind(c, false); + debug("Using UDP timeout parameters, timeout: %d, stream_timeout: %d", + c->udp.timeout, c->udp.stream_timeout); } /** * udp_init() - Initialise per-socket data, and sockets in namespace * @c: Execution context * - * Return: 0 + * Return: 0 on success, -1 on failure */ int udp_init(struct ctx *c) { ASSERT(!c->no_udp); + udp_get_timeout_params(c); + udp_iov_init(c); + if (fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP) < 0) + return -1; + if (c->mode == MODE_PASTA) { udp_splice_iov_init(); - NS_CALL(udp_port_rebind_outbound, c); + if (fwd_listen_sync(c, &c->udp.fwd_out, + PIF_SPLICE, IPPROTO_UDP) < 0) + return -1; } return 0; |
