diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2021-04-23 22:22:37 +0200 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2021-04-23 22:22:37 +0200 |
commit | 38b50dba4704856194ac02b98e492d2349d64058 (patch) | |
tree | b7f7a12479aa53f9e7ac2cae7a4e709ce78d99a5 /passt.c | |
parent | 962bc97cf116519bd11b7e8beeda6dcce033d537 (diff) | |
download | passt-38b50dba4704856194ac02b98e492d2349d64058.tar passt-38b50dba4704856194ac02b98e492d2349d64058.tar.gz passt-38b50dba4704856194ac02b98e492d2349d64058.tar.bz2 passt-38b50dba4704856194ac02b98e492d2349d64058.tar.lz passt-38b50dba4704856194ac02b98e492d2349d64058.tar.xz passt-38b50dba4704856194ac02b98e492d2349d64058.tar.zst passt-38b50dba4704856194ac02b98e492d2349d64058.zip |
passt: Spare some syscalls, add some optimisations from profiling
Avoid a bunch of syscalls on forwarding paths by:
- storing minimum and maximum file descriptor numbers for each
protocol, fall back to SO_PROTOCOL query only on overlaps
- allocating a larger receive buffer -- this can result in more
coalesced packets than sendmmsg() can take (UIO_MAXIOV, i.e. 1024),
so make sure we don't exceed that within a single call to protocol
tap handlers
- nesting the handling loop in tap_handler() in the receive loop,
so that we have better chances of filling our receive buffer in
fewer calls
- skipping the recvfrom() in the UDP handler on EPOLLERR -- there's
nothing to be done in that case
and while at it:
- restore the 20ms timer interval for periodic (TCP) events, I
accidentally changed that to 100ms in an earlier commit
- attempt using SO_ZEROCOPY for UDP -- if it's not available,
sendmmsg() will succeed anyway
- fix the handling of the status code from sendmmsg(), if it fails,
we'll try to discard the first message, hence return 1 from the
UDP handler
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'passt.c')
-rw-r--r-- | passt.c | 113 |
1 files changed, 69 insertions, 44 deletions
@@ -57,9 +57,11 @@ #define EPOLL_EVENTS 10 -#define TAP_NMSG 32 /* maximum messages to buffer from tap */ +#define TAP_BUF_BYTES (ETH_MAX_MTU * 8) +#define TAP_BUF_FILL (TAP_BUF_BYTES - ETH_MAX_MTU - sizeof(uint32_t)) +#define TAP_MSGS (TAP_BUF_BYTES / sizeof(struct ethhdr) + 1) -#define TIMER_INTERVAL 100 /* ms, for protocol periodic handlers */ +#define TIMER_INTERVAL 20 /* ms, for protocol periodic handlers */ /** * sock_unix() - Create and bind AF_UNIX socket, add to epoll list @@ -515,7 +517,7 @@ static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count) return 1; } -static char tap_buf[ETH_MAX_MTU * TAP_NMSG]; +static char tap_buf[TAP_BUF_BYTES]; /** * tap_handler() - Packet handler for tap file descriptor @@ -525,32 +527,30 @@ static char tap_buf[ETH_MAX_MTU * TAP_NMSG]; */ static int tap_handler(struct ctx *c) { - int msg_count = 0, same, rcv = 0, i = 0; - struct tap_msg msg[UIO_MAXIOV]; - ssize_t n, rem, fill; + struct tap_msg msg[TAP_MSGS]; + int msg_count, same, i; struct ethhdr *eh; char *p = tap_buf; + ssize_t n, rem; - fill = ETH_MAX_MTU * (TAP_NMSG - 1); + while ((n = recv(c->fd_unix, p, TAP_BUF_FILL, MSG_DONTWAIT)) > 0) { + msg_count = 0; - while ((n = recv(c->fd_unix, p, fill, MSG_DONTWAIT)) > 0) { - fill -= n; - while (n > 0) { + while (n > (ssize_t)sizeof(uint32_t)) { ssize_t len = ntohl(*(uint32_t *)p); p += sizeof(uint32_t); n -= sizeof(uint32_t); if (len < (ssize_t)sizeof(*eh)) - break; + return 0; /* At most one packet might not fit in a single read */ if (len > n) { - rem = recv(c->fd_unix, p + n, fill, + rem = recv(c->fd_unix, p + n, len - n, MSG_DONTWAIT); - rcv = errno; - if (rem <= 0 || rem + n != len) - break; + if ((n += rem) != len) + return 0; } msg[msg_count].start = p; @@ -559,40 +559,49 @@ static int tap_handler(struct ctx *c) n -= len; p += len; } - } - - rcv = errno; - while (i < msg_count) { - eh = (struct ethhdr *)msg[i].start; + i = 0; + while (i < msg_count) { + eh = (struct ethhdr *)msg[i].start; switch (ntohs(eh->h_proto)) { - case ETH_P_ARP: - tap4_handler(c, msg + i, 1); - i++; - break; - case ETH_P_IP: - for (same = 1; i + same < msg_count; same++) { - eh = (struct ethhdr *)msg[i + same].start; - if (ntohs(eh->h_proto) != ETH_P_IP) - break; - } + case ETH_P_ARP: + tap4_handler(c, msg + i, 1); + i++; + break; + case ETH_P_IP: + for (same = 1; i + same < msg_count && + same < UIO_MAXIOV; same++) { + struct tap_msg *next = &msg[i + same]; + + eh = (struct ethhdr *)next->start; + if (ntohs(eh->h_proto) != ETH_P_IP) + break; + } + i += tap4_handler(c, msg + i, same); - break; - case ETH_P_IPV6: - for (same = 1; i + same < msg_count; same++) { - eh = (struct ethhdr *)msg[i + same].start; - if (ntohs(eh->h_proto) != ETH_P_IPV6) - break; - } + break; + case ETH_P_IPV6: + for (same = 1; i + same < msg_count && + same < UIO_MAXIOV; same++) { + struct tap_msg *next = &msg[i + same]; + + eh = (struct ethhdr *)next->start; + if (ntohs(eh->h_proto) != ETH_P_IPV6) + break; + } + i += tap6_handler(c, msg + i, same); - break; - default: - i++; - break; + break; + default: + i++; + break; + } } + + p = tap_buf; } - if (n >= 0 || rcv == EINTR || rcv == EAGAIN || rcv == EWOULDBLOCK) + if (n >= 0 || errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) return 0; epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_unix, NULL); @@ -614,8 +623,21 @@ static void sock_handler(struct ctx *c, int fd, uint32_t events) sl = sizeof(so); - if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &so, &sl)) +#define IN(x, proto) (x >= c->proto.fd_min && x <= c->proto.fd_max) + + if (IN(fd, udp) && !IN(fd, icmp) && !IN(fd, tcp)) + so = IPPROTO_UDP; + else if (IN(fd, tcp) && !IN(fd, icmp) && !IN(fd, udp)) + so = IPPROTO_TCP; + else if (IN(fd, icmp) && !IN(fd, udp) && !IN(fd, tcp)) + so = IPPROTO_ICMP; /* Fits ICMPv6 below, too */ + else if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &so, &sl)) { + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, fd, NULL); + close(fd); return; + } + +#undef IN debug("%s: packet from socket %i", getprotobynumber(so)->p_name, fd); @@ -771,7 +793,10 @@ loop: for (i = 0; i < nfds; i++) { if (events[i].data.fd == c.fd_unix) { - if (tap_handler(&c)) + if (events[i].events & EPOLLRDHUP || + events[i].events & EPOLLHUP || + events[i].events & EPOLLERR || + tap_handler(&c)) goto listen; } else { sock_handler(&c, events[i].data.fd, events[i].events); |