diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2021-04-22 13:39:36 +0200 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2021-04-22 13:39:36 +0200 |
commit | 1f7cf04d343f185f9e044fdca70a1d0252492aed (patch) | |
tree | 77b28cc46e824f9247151ca21849e74da1cc0d57 | |
parent | 5b0c88d4ef48c2c3c434929faf418f46db06d555 (diff) | |
download | passt-1f7cf04d343f185f9e044fdca70a1d0252492aed.tar passt-1f7cf04d343f185f9e044fdca70a1d0252492aed.tar.gz passt-1f7cf04d343f185f9e044fdca70a1d0252492aed.tar.bz2 passt-1f7cf04d343f185f9e044fdca70a1d0252492aed.tar.lz passt-1f7cf04d343f185f9e044fdca70a1d0252492aed.tar.xz passt-1f7cf04d343f185f9e044fdca70a1d0252492aed.tar.zst passt-1f7cf04d343f185f9e044fdca70a1d0252492aed.zip |
passt: Introduce packet batching mechanism
Receive packets in batches from AF_UNIX, check if they can be sent
with a single syscall, and batch them up with sendmmsg() in case.
A bit rudimentary, currently only implemented for UDP, but it seems
to work.
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r-- | icmp.c | 31 | ||||
-rw-r--r-- | icmp.h | 3 | ||||
-rw-r--r-- | passt.c | 284 | ||||
-rw-r--r-- | passt.h | 14 | ||||
-rw-r--r-- | tcp.c | 44 | ||||
-rw-r--r-- | tcp.h | 3 | ||||
-rw-r--r-- | udp.c | 69 | ||||
-rw-r--r-- | udp.h | 3 |
8 files changed, 333 insertions, 118 deletions
@@ -77,45 +77,54 @@ void icmp_sock_handler(struct ctx *c, int s, uint32_t events) * icmp_tap_handler() - Handle packets from tap * @c: Execution context * @af: Address family, AF_INET or AF_INET6 - * @in: Input buffer - * @len: Length, including UDP header + * @msg: Input message + * @count: Message count (always 1 for ICMP) + * + * Return: count of consumed packets (always 1, even if malformed) */ -void icmp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) +int icmp_tap_handler(struct ctx *c, int af, void *addr, + struct tap_msg *msg, int count) { + (void)count; + if (af == AF_INET) { - struct icmphdr *ih = (struct icmphdr *)in; + struct icmphdr *ih = (struct icmphdr *)msg[0].l4h; struct sockaddr_in sa = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), }; - if (len < sizeof(*ih) || ih->type != ICMP_ECHO) - return; + if (msg[0].l4_len < sizeof(*ih) || ih->type != ICMP_ECHO) + return 1; sa.sin_port = ih->un.echo.id; bind(c->icmp.s4, (struct sockaddr *)&sa, sizeof(sa)); sa.sin_addr = *(struct in_addr *)addr; - sendto(c->icmp.s4, in, len, MSG_DONTWAIT, + sendto(c->icmp.s4, msg[0].l4h, msg[0].l4_len, + MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *)&sa, sizeof(sa)); } else if (af == AF_INET6) { struct sockaddr_in6 sa = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, }; - struct icmp6hdr *ih = (struct icmp6hdr *)in; + struct icmp6hdr *ih = (struct icmp6hdr *)msg[0].l4h; - if (len < sizeof(*ih) || + if (msg[0].l4_len < sizeof(*ih) || (ih->icmp6_type != 128 && ih->icmp6_type != 129)) - return; + return 1; sa.sin6_port = ih->icmp6_identifier; bind(c->icmp.s6, (struct sockaddr *)&sa, sizeof(sa)); sa.sin6_addr = *(struct in6_addr *)addr; - sendto(c->icmp.s6, in, len, MSG_DONTWAIT | MSG_NOSIGNAL, + sendto(c->icmp.s6, msg[0].l4h, msg[0].l4_len, + MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *)&sa, sizeof(sa)); } + + return 1; } /** @@ -4,7 +4,8 @@ struct ctx; void icmp_sock_handler(struct ctx *c, int s, uint32_t events); -void icmp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len); +int icmp_tap_handler(struct ctx *c, int af, void *addr, + struct tap_msg *msg, int count); int icmp_sock_init(struct ctx *c); /** @@ -19,6 +19,7 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/uio.h> #include <sys/un.h> #include <ifaddrs.h> #include <linux/if_ether.h> @@ -56,7 +57,9 @@ #define EPOLL_EVENTS 10 -#define TIMER_INTERVAL 20 /* ms, for protocol periodic handlers */ +#define TAP_NMSG 32 /* maximum messages to buffer from tap */ + +#define TIMER_INTERVAL 100 /* ms, for protocol periodic handlers */ /** * sock_unix() - Create and bind AF_UNIX socket, add to epoll list @@ -303,87 +306,182 @@ static void get_dns(struct ctx *c) /** * tap4_handler() - IPv4 and ARP packet handler for tap file descriptor * @c: Execution context - * @len: Total L2 packet length - * @in: Packet buffer, L2 headers + * @msg: Array of messages with the same L3 protocol + * @count: Count of messages with the same L3 protocol + * + * Return: count of packets consumed by handlers */ -static void tap4_handler(struct ctx *c, char *in, size_t len) +static int tap4_handler(struct ctx *c, struct tap_msg *msg, size_t count) { char buf_s[INET_ADDRSTRLEN] __attribute((__unused__)); char buf_d[INET_ADDRSTRLEN] __attribute((__unused__)); - struct ethhdr *eh = (struct ethhdr *)in; - struct iphdr *iph = (struct iphdr *)(eh + 1); + struct ethhdr *eh = (struct ethhdr *)msg[0].start; + struct iphdr *iph, *prev_iph = NULL; + struct udphdr *uh, *prev_uh = NULL; + size_t len = msg[0].len; + unsigned int i; char *l4h; if (!c->v4) - return; + return count; + + if (len < sizeof(*eh) + sizeof(*iph)) + return 1; if (arp(c, eh, len) || dhcp(c, eh, len)) - return; + return 1; - if (len < sizeof(*eh) + sizeof(*iph)) - return; + for (i = 0; i < count; i++) { + len = msg[i].len; + if (len < sizeof(*eh) + sizeof(*iph)) + return 1; - l4h = (char *)iph + iph->ihl * 4; - len -= (intptr_t)l4h - (intptr_t)eh; + eh = (struct ethhdr *)msg[i].start; + iph = (struct iphdr *)(eh + 1); + l4h = (char *)iph + iph->ihl * 4; - if (iph->protocol == IPPROTO_ICMP) { - debug("icmp from tap: %s -> %s", - inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)), - inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d))); - } else if (iph->protocol == IPPROTO_TCP || - iph->protocol == IPPROTO_UDP || - iph->protocol == IPPROTO_SCTP) { - struct tcphdr *th = (struct tcphdr *)l4h; + msg[i].l4h = l4h; + msg[i].l4_len = len - ((intptr_t)l4h - (intptr_t)eh); + + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + break; + + if (len < sizeof(*uh)) + break; + + uh = (struct udphdr *)l4h; + + if (!i) { + prev_iph = iph; + prev_uh = uh; + continue; + } - if (len < sizeof(*th) && len < sizeof(struct udphdr)) - return; + if (iph->tos != prev_iph->tos || + iph->frag_off != prev_iph->frag_off || + iph->protocol != prev_iph->protocol || + iph->saddr != prev_iph->saddr || + iph->daddr != prev_iph->daddr || + uh->source != prev_uh->source || + uh->dest != prev_uh->dest) + break; + + prev_iph = iph; + prev_uh = uh; + } - debug("%s from tap: %s:%i -> %s:%i", + eh = (struct ethhdr *)msg[0].start; + iph = (struct iphdr *)(eh + 1); + + if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_SCTP) { + uh = (struct udphdr *)msg[0].l4h; + + if (msg[0].len < sizeof(*uh)) + return 1; + + debug("%s from tap: %s:%i -> %s:%i (%i packet%s)", getprotobynumber(iph->protocol)->p_name, inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)), - ntohs(th->source), + ntohs(uh->source), inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)), - ntohs(th->dest)); + ntohs(uh->dest), + i, i > 1 ? "s" : ""); + } else if (iph->protocol == IPPROTO_ICMP) { + debug("icmp from tap: %s -> %s", + inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)), + inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d))); } if (iph->protocol == IPPROTO_TCP) - tcp_tap_handler(c, AF_INET, &iph->daddr, l4h, len); - else if (iph->protocol == IPPROTO_UDP) - udp_tap_handler(c, AF_INET, &iph->daddr, l4h, len); - else if (iph->protocol == IPPROTO_ICMP) - icmp_tap_handler(c, AF_INET, &iph->daddr, l4h, len); + return tcp_tap_handler(c, AF_INET, &iph->daddr, msg, i); + + if (iph->protocol == IPPROTO_UDP) + return udp_tap_handler(c, AF_INET, &iph->daddr, msg, i); + + if (iph->protocol == IPPROTO_ICMP) + icmp_tap_handler(c, AF_INET, &iph->daddr, msg, 1); + + return 1; } /** * tap6_handler() - IPv6 packet handler for tap file descriptor * @c: Execution context - * @len: Total L2 packet length - * @in: Packet buffer, L2 headers + * @msg: Array of messages with the same L3 protocol + * @count: Count of messages with the same L3 protocol */ -static void tap6_handler(struct ctx *c, char *in, size_t len) +static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count) { char buf_s[INET6_ADDRSTRLEN] __attribute((__unused__)); char buf_d[INET6_ADDRSTRLEN] __attribute((__unused__)); - struct ethhdr *eh = (struct ethhdr *)in; - struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1); - uint8_t proto; + struct ethhdr *eh = (struct ethhdr *)msg[0].start; + struct udphdr *uh, *prev_uh = NULL; + uint8_t proto = 0, prev_proto = 0; + size_t len = msg[0].len; + struct ipv6hdr *ip6h; + unsigned int i; char *l4h; if (!c->v6) - return; + return count; if (len < sizeof(*eh) + sizeof(*ip6h)) - return; + return 1; if (ndp(c, eh, len) || dhcpv6(c, eh, len)) - return; + return 1; - l4h = ipv6_l4hdr(ip6h, &proto); + for (i = 0; i < count; i++) { + struct ipv6hdr *p_ip6h; - c->addr6_guest = ip6h->saddr; - ip6h->saddr = c->addr6; + len = msg[i].len; + if (len < sizeof(*eh) + sizeof(*ip6h)) + return 1; - len -= (intptr_t)l4h - (intptr_t)eh; + eh = (struct ethhdr *)msg[i].start; + ip6h = (struct ipv6hdr *)(eh + 1); + l4h = ipv6_l4hdr(ip6h, &proto); + + msg[i].l4h = l4h; + msg[i].l4_len = len - ((intptr_t)l4h - (intptr_t)eh); + + c->addr6_guest = ip6h->saddr; + ip6h->saddr = c->addr6; + + if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) + break; + + if (len < sizeof(*uh)) + break; + + uh = (struct udphdr *)l4h; + + if (!i) { + p_ip6h = ip6h; + prev_proto = proto; + prev_uh = uh; + continue; + } + + if (proto != prev_proto || + memcmp(&ip6h->saddr, &p_ip6h->saddr, sizeof(ip6h->saddr)) || + memcmp(&ip6h->daddr, &p_ip6h->daddr, sizeof(ip6h->daddr)) || + uh->source != prev_uh->source || + uh->dest != prev_uh->dest) + break; + + p_ip6h = ip6h; + prev_proto = proto; + prev_uh = uh; + } + + if (prev_proto) + proto = prev_proto; + + eh = (struct ethhdr *)msg[0].start; + ip6h = (struct ipv6hdr *)(eh + 1); if (proto == IPPROTO_ICMPV6) { debug("icmpv6 from tap: %s ->\n\t%s", @@ -391,27 +489,34 @@ static void tap6_handler(struct ctx *c, char *in, size_t len) inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d))); } else if (proto == IPPROTO_TCP || proto == IPPROTO_UDP || proto == IPPROTO_SCTP) { - struct tcphdr *th = (struct tcphdr *)l4h; + uh = (struct udphdr *)msg[0].l4h; - if (len < sizeof(*th) && len < sizeof(struct udphdr)) - return; + if (msg[0].len < sizeof(*uh)) + return 1; - debug("%s from tap: [%s]:%i\n\t-> [%s]:%i", + debug("%s from tap: [%s]:%i\n\t-> [%s]:%i (%i packet%s)", getprotobynumber(proto)->p_name, inet_ntop(AF_INET6, &ip6h->saddr, buf_s, sizeof(buf_s)), - ntohs(th->source), + ntohs(uh->source), inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d)), - ntohs(th->dest)); + ntohs(uh->dest), + i, i > 1 ? "s" : ""); } if (proto == IPPROTO_TCP) - tcp_tap_handler(c, AF_INET6, &ip6h->daddr, l4h, len); - else if (proto == IPPROTO_UDP) - udp_tap_handler(c, AF_INET6, &ip6h->daddr, l4h, len); - else if (proto == IPPROTO_ICMPV6) - icmp_tap_handler(c, AF_INET6, &ip6h->daddr, l4h, len); + return tcp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i); + + if (proto == IPPROTO_UDP) + return udp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i); + + if (proto == IPPROTO_ICMPV6) + icmp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, 1); + + return 1; } +static char tap_buf[ETH_MAX_MTU * TAP_NMSG]; + /** * tap_handler() - Packet handler for tap file descriptor * @c: Execution context @@ -420,33 +525,74 @@ static void tap6_handler(struct ctx *c, char *in, size_t len) */ static int tap_handler(struct ctx *c) { - char buf[ETH_MAX_MTU]; + int msg_count = 0, same, rcv = 0, i = 0; + struct tap_msg msg[UIO_MAXIOV]; + ssize_t n, rem, fill; struct ethhdr *eh; - uint32_t vnet_len; - ssize_t n; + char *p = tap_buf; - eh = (struct ethhdr *)buf; + fill = ETH_MAX_MTU * (TAP_NMSG - 1); - while ((n = recv(c->fd_unix, &vnet_len, 4, MSG_DONTWAIT)) == 4) { - n = recv(c->fd_unix, buf, ntohl(vnet_len), MSG_DONTWAIT); + while ((n = recv(c->fd_unix, p, fill, MSG_DONTWAIT)) > 0) { + fill -= n; + while (n > 0) { + ssize_t len = ntohl(*(uint32_t *)p); - if (n < (ssize_t)sizeof(*eh)) - break; + p += sizeof(uint32_t); + n -= sizeof(uint32_t); - switch (ntohs(eh->h_proto)) { - case ETH_P_IP: + if (len < (ssize_t)sizeof(*eh)) + break; + + /* At most one packet might not fit in a single read */ + if (len > n) { + rem = recv(c->fd_unix, p + n, fill, + MSG_DONTWAIT); + rcv = errno; + if (rem <= 0 || rem + n != len) + break; + } + + msg[msg_count].start = p; + msg[msg_count++].len = len; + + n -= len; + p += len; + } + } + + rcv = errno; + + while (i < msg_count) { + eh = (struct ethhdr *)msg[i].start; + switch (ntohs(eh->h_proto)) { case ETH_P_ARP: - tap4_handler(c, buf, n); + tap4_handler(c, msg + i, 1); + i++; + break; + case ETH_P_IP: + for (same = 1; i + same < msg_count; same++) { + eh = (struct ethhdr *)msg[i + same].start; + if (ntohs(eh->h_proto) != ETH_P_IP) + break; + } + i += tap4_handler(c, msg + i, same); break; case ETH_P_IPV6: - tap6_handler(c, buf, n); + for (same = 1; i + same < msg_count; same++) { + eh = (struct ethhdr *)msg[i + same].start; + if (ntohs(eh->h_proto) != ETH_P_IPV6) + break; + } + i += tap6_handler(c, msg + i, same); break; default: + i++; break; } } - if (n >= 0 || errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) + if (n >= 0 || rcv == EINTR || rcv == EAGAIN || rcv == EWOULDBLOCK) return 0; epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_unix, NULL); @@ -552,7 +698,7 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } -#if DEBUG +#if DEBUG || 1 openlog("passt", LOG_PERROR, LOG_DAEMON); #else openlog("passt", 0, LOG_DAEMON); @@ -610,7 +756,7 @@ listen: UNIX_SOCK_PATH " -net nic,model=virtio"); c.fd_unix = accept(fd_unix, NULL, NULL); - ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP | EPOLLERR | EPOLLHUP; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP; ev.data.fd = c.fd_unix; epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev); @@ -1,5 +1,19 @@ #define UNIX_SOCK_PATH "/tmp/passt.socket" +/** + * struct tap_msg - Generic message descriptor for arrays of messages + * @start: Pointer to message start + * @l4_start: Pointer to L4 header + * @len: Message length, with L2 headers + * @l4_len: Message length, with L4 headers + */ +struct tap_msg { + char *start; + char *l4h; + size_t len; + size_t l4_len; +}; + #include "icmp.h" #include "tcp.h" @@ -583,7 +583,7 @@ static int tcp_sock_hash_match(struct tcp_conn *conn, int af, void *addr, static unsigned int tcp_sock_hash(struct ctx *c, int af, void *addr, in_port_t tap_port, in_port_t sock_port) { - uint64_t b; + uint64_t b = 0; if (af == AF_INET) { struct { @@ -853,7 +853,7 @@ static uint32_t tcp_seq_init(struct ctx *c, int af, void *addr, in_port_t dstport, in_port_t srcport) { struct timespec ts = { 0 }; - uint32_t ns, seq; + uint32_t ns, seq = 0; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -1186,31 +1186,39 @@ out: * tcp_tap_handler() - Handle packets from tap and state transitions * @c: Execution context * @af: Address family, AF_INET or AF_INET6 - * @in: Input buffer - * @len: Length, including TCP header + * @msg: Input messages + * @count: Message count + * + * Return: count of consumed packets */ -void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) +int tcp_tap_handler(struct ctx *c, int af, void *addr, + struct tap_msg *msg, int count) { - struct tcphdr *th = (struct tcphdr *)in; + /* TODO: Implement message batching for TCP */ + struct tcphdr *th = (struct tcphdr *)msg[0].l4h; + size_t len = msg[0].l4_len; + size_t off, skip = 0; int s, ws; + (void)count; + if (len < sizeof(*th)) - return; + return 1; off = th->doff * 4; if (off < sizeof(*th) || off > len) - return; + return 1; if ((s = tcp_sock_hash_lookup(c, af, addr, th->source, th->dest)) < 0) { if (th->syn) tcp_conn_from_tap(c, af, addr, th, len); - return; + return 1; } if (th->rst) { tcp_close_and_epoll_del(c, s); - return; + return 1; } tcp_clamp_window(s, th, len, th->syn && th->ack); @@ -1224,7 +1232,7 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) case SOCK_SYN_SENT: if (!th->syn || !th->ack) { tcp_rst(c, s); - return; + return 1; } tc[s].mss_guest = tcp_opt_get(th, len, OPT_MSS, NULL, NULL); @@ -1234,12 +1242,12 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) ws = tcp_opt_get(th, len, OPT_WS, NULL, NULL); if (ws > MAX_WS) { if (tcp_send_to_tap(c, s, RST, NULL, 0)) - return; + return 1; tc[s].seq_to_tap = 0; tc[s].ws_allowed = 0; tcp_send_to_tap(c, s, SYN, NULL, 0); - return; + return 1; } /* info.tcpi_bytes_acked already includes one byte for SYN, but @@ -1261,7 +1269,7 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) if (!th->ack) { tcp_rst(c, s); - return; + return 1; } tcp_set_state(s, ESTABLISHED); @@ -1294,7 +1302,8 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) } if (skip < len - off && - tcp_send_to_sock(c, s, in + off + skip, len - off - skip, + tcp_send_to_sock(c, s, + msg[0].l4h + off + skip, len - off - skip, th->psh ? 0 : MSG_MORE)) break; @@ -1311,7 +1320,8 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) tcp_sock_consume(s, ntohl(th->ack_seq)); if (skip < len - off && - tcp_send_to_sock(c, s, in + off + skip, len - off - skip, + tcp_send_to_sock(c, s, + msg[0].l4h + off + skip, len - off - skip, th->psh ? 0 : MSG_MORE)) break; @@ -1331,6 +1341,8 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) case CLOSED: /* ;) */ break; } + + return 1; } /** @@ -4,7 +4,8 @@ struct ctx; void tcp_sock_handler(struct ctx *c, int s, uint32_t events); -void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len); +int tcp_tap_handler(struct ctx *c, int af, void *addr, + struct tap_msg *msg, int count); int tcp_sock_init(struct ctx *c); void tcp_timer(struct ctx *c, struct timespec *ts); @@ -23,6 +23,7 @@ * */ +#define _GNU_SOURCE #include <stdio.h> #include <errno.h> #include <limits.h> @@ -35,6 +36,7 @@ #include <sys/epoll.h> #include <sys/types.h> #include <sys/socket.h> +#include <sys/uio.h> #include <unistd.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -105,43 +107,72 @@ void udp_sock_handler(struct ctx *c, int s, uint32_t events) * udp_tap_handler() - Handle packets from tap * @c: Execution context * @af: Address family, AF_INET or AF_INET6 - * @in: Input buffer - * @len: Length, including UDP header + * @msg: Input messages + * @count: Message count + * + * Return: count of consumed packets */ -void udp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len) +int udp_tap_handler(struct ctx *c, int af, void *addr, + struct tap_msg *msg, int count) { - struct udphdr *uh = (struct udphdr *)in; - int s; + /* The caller already checks that all the messages have the same source + * and destination, so we can just take those from the first message. + */ + struct udphdr *uh = (struct udphdr *)msg[0].l4h; + struct mmsghdr mm[UIO_MAXIOV] = { 0 }; + struct iovec m[UIO_MAXIOV]; + struct sockaddr_in6 s_in6; + struct sockaddr_in s_in; + struct sockaddr *sa; + socklen_t sl; + int i, s; (void)c; if (af == AF_INET) { - struct sockaddr_in sa = { + s_in = (struct sockaddr_in) { .sin_family = AF_INET, .sin_port = uh->dest, + .sin_addr = *(struct in_addr *)addr, }; - if (!(s = udp4_sock_port[ntohs(uh->source)])) - return; - - sa.sin_addr = *(struct in_addr *)addr; - - sendto(s, in + sizeof(*uh), len - sizeof(*uh), MSG_DONTWAIT, - (struct sockaddr *)&sa, sizeof(sa)); + sa = (struct sockaddr *)&s_in; + sl = sizeof(s_in); } else if (af == AF_INET6) { - struct sockaddr_in6 sa = { + s_in6 = (struct sockaddr_in6) { .sin6_family = AF_INET6, .sin6_port = uh->dest, .sin6_addr = *(struct in6_addr *)addr, }; - if (!(s = udp6_sock_port[ntohs(uh->source)])) - return; + sa = (struct sockaddr *)&s_in6; + sl = sizeof(s_in6); + } else { + return count; + } + + for (i = 0; i < count; i++) { + m[i].iov_base = (char *)((struct udphdr *)msg[i].l4h + 1); + m[i].iov_len = msg[i].l4_len - sizeof(*uh); - sendto(s, in + sizeof(*uh), len - sizeof(*uh), - MSG_DONTWAIT | MSG_NOSIGNAL, - (struct sockaddr *)&sa, sizeof(sa)); + mm[i].msg_hdr.msg_name = sa; + mm[i].msg_hdr.msg_namelen = sl; + + mm[i].msg_hdr.msg_iov = m + i; + mm[i].msg_hdr.msg_iovlen = 1; + } + + if (af == AF_INET) { + if (!(s = udp4_sock_port[ntohs(uh->source)])) + return count; + } else if (af == AF_INET6) { + if (!(s = udp6_sock_port[ntohs(uh->source)])) + return count; + } else { + return count; } + + return sendmmsg(s, mm, count, MSG_DONTWAIT | MSG_NOSIGNAL); } /** @@ -1,3 +1,4 @@ void udp_sock_handler(struct ctx *c, int s, uint32_t events); -void udp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len); +int udp_tap_handler(struct ctx *c, int af, void *addr, + struct tap_msg *msg, int count); int udp_sock_init(struct ctx *c); |