diff options
author | Stefano Brivio <sbrivio@redhat.com> | 2021-02-21 11:33:38 +0100 |
---|---|---|
committer | Stefano Brivio <sbrivio@redhat.com> | 2021-02-21 11:55:49 +0100 |
commit | 8bca388e8a771d069b2a2d4ac47589112f6f0af3 (patch) | |
tree | 6e934d87ab33ef3116da032c3f5acb044c0cc6cc /passt.c | |
parent | 105b916361ca6e9e63112444c323cc193303120c (diff) | |
download | passt-8bca388e8a771d069b2a2d4ac47589112f6f0af3.tar passt-8bca388e8a771d069b2a2d4ac47589112f6f0af3.tar.gz passt-8bca388e8a771d069b2a2d4ac47589112f6f0af3.tar.bz2 passt-8bca388e8a771d069b2a2d4ac47589112f6f0af3.tar.lz passt-8bca388e8a771d069b2a2d4ac47589112f6f0af3.tar.xz passt-8bca388e8a771d069b2a2d4ac47589112f6f0af3.tar.zst passt-8bca388e8a771d069b2a2d4ac47589112f6f0af3.zip |
passt: Assorted fixes from "fresh eyes" review
A bunch of fixes not worth single commits at this stage, notably:
- make buffer, length parameter ordering consistent in ARP, DHCP,
NDP handlers
- strict checking of buffer, message and option length in DHCP
handler (a malicious client could have easily crashed it)
- set up forwarding for IPv4 and IPv6, and masquerading with nft for
IPv4, from demo script
- get rid of separate slow and fast timers, we don't save any
overhead that way
- stricter checking of buffer lengths as passed to tap handlers
- proper dequeuing from qemu socket back-end: I accidentally trashed
messages that were bundled up together in a single tap read
operation -- the length header tells us what's the size of the next
frame, but there's no apparent limit to the number of messages we
get with one single receive
- rework some bits of the TCP state machine, now passive and active
connection closes appear to be robust -- introduce a new
FIN_WAIT_1_SOCK_FIN state indicating a FIN_WAIT_1 with a FIN flag
from socket
- streamline TCP option parsing routine
- track TCP state changes to stderr (this is temporary, proper
debugging and syslogging support pending)
- observe that multiplying a number by four might very well change
its value, and this happens to be the case for the data offset
from the TCP header as we check if it's the same as the total
length to find out if it's a duplicated ACK segment
- recent estimates suggest that the duration of a millisecond is
closer to a million nanoseconds than a thousand of them, this
trend is now reflected into the timespec_diff_ms() convenience
routine
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'passt.c')
-rw-r--r-- | passt.c | 148 |
1 files changed, 74 insertions, 74 deletions
@@ -51,9 +51,7 @@ #define EPOLL_EVENTS 10 -#define EPOLL_TIMEOUT 100 /* ms, for protocol periodic handlers */ -#define PERIODIC_HANDLER_FAST 100 -#define PERIODIC_HANDLER_SLOW 1000 +#define TIMER_INTERVAL 20 /* ms, for protocol periodic handlers */ /** * sock_unix() - Create and bind AF_UNIX socket, add to epoll list @@ -294,7 +292,7 @@ static void get_dns(struct ctx *c) } /** - * tap4_handler() - IPv4 packet handler for tap file descriptor + * tap4_handler() - IPv4 and ARP packet handler for tap file descriptor * @c: Execution context * @len: Total L2 packet length * @in: Packet buffer, L2 headers @@ -303,12 +301,18 @@ static void tap4_handler(struct ctx *c, char *in, size_t len) { struct ethhdr *eh = (struct ethhdr *)in; struct iphdr *iph = (struct iphdr *)(eh + 1); - char *l4h = (char *)iph + iph->ihl * 4; char buf_s[BUFSIZ], buf_d[BUFSIZ]; + char *l4h; + + if (arp(c, eh, len) || dhcp(c, eh, len)) + return; - if (arp(c, len, eh) || dhcp(c, len, eh)) + if (len < sizeof(*eh) + sizeof(*iph)) return; + l4h = (char *)iph + iph->ihl * 4; + len -= (intptr_t)l4h - (intptr_t)eh; + if (iph->protocol == IPPROTO_ICMP) { fprintf(stderr, "icmp from tap: %s -> %s\n", inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)), @@ -316,6 +320,9 @@ static void tap4_handler(struct ctx *c, char *in, size_t len) } else { struct tcphdr *th = (struct tcphdr *)l4h; + if (len < sizeof(*th) && len < sizeof(struct udphdr)) + return; + fprintf(stderr, "%s from tap: %s:%i -> %s:%i\n", getprotobynumber(iph->protocol)->p_name, inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)), @@ -324,8 +331,6 @@ static void tap4_handler(struct ctx *c, char *in, size_t len) ntohs(th->dest)); } - len -= (intptr_t)l4h - (intptr_t)eh; - if (iph->protocol == IPPROTO_TCP) tcp_tap_handler(c, AF_INET, &iph->daddr, l4h, len); else if (iph->protocol == IPPROTO_UDP) @@ -346,33 +351,21 @@ static void tap6_handler(struct ctx *c, char *in, size_t len) uint8_t proto; char *l4h; - if (ndp(c, len, eh)) + if (len < sizeof(*eh) + sizeof(*ip6h)) + return; + + if (ndp(c, eh, len)) return; l4h = ipv6_l4hdr(ip6h, &proto); /* TODO: Assign MAC address to guest so that, together with prefix - * assigned via NDP, address matches the one on the host. Then drop - * address change and checksum recomputation. + * assigned via NDP, address matches the one from the host. */ c->addr6_guest = ip6h->saddr; ip6h->saddr = c->addr6; - if (proto == IPPROTO_TCP) { - struct tcphdr *th = (struct tcphdr *)(ip6h + 1); - - th->check = 0; - th->check = csum_ip4(ip6h, len + sizeof(*ip6h)); - } else if (proto == IPPROTO_UDP) { - struct udphdr *uh = (struct udphdr *)(ip6h + 1); - - uh->check = 0; - uh->check = csum_ip4(ip6h, len + sizeof(*ip6h)); - } else if (proto == IPPROTO_ICMPV6) { - struct icmp6hdr *ih = (struct icmp6hdr *)(ip6h + 1); - ih->icmp6_cksum = 0; - ih->icmp6_cksum = csum_ip4(ip6h, len + sizeof(*ip6h)); - } + len -= (intptr_t)l4h - (intptr_t)eh; if (proto == IPPROTO_ICMPV6) { fprintf(stderr, "icmpv6 from tap: %s ->\n\t%s\n", @@ -382,6 +375,9 @@ static void tap6_handler(struct ctx *c, char *in, size_t len) } else { struct tcphdr *th = (struct tcphdr *)l4h; + if (len < sizeof(*th) && len < sizeof(struct udphdr)) + return; + fprintf(stderr, "%s from tap: [%s]:%i\n" "\t-> [%s]:%i\n", getprotobynumber(proto)->p_name, @@ -391,8 +387,6 @@ static void tap6_handler(struct ctx *c, char *in, size_t len) ntohs(th->dest)); } - len -= (intptr_t)l4h - (intptr_t)eh; - if (proto == IPPROTO_TCP) tcp_tap_handler(c, AF_INET6, &ip6h->daddr, l4h, len); else if (proto == IPPROTO_UDP) @@ -400,19 +394,46 @@ static void tap6_handler(struct ctx *c, char *in, size_t len) } /** - * tap_handler() - IPv4/IPv6/ARP packet handler for tap file descriptor + * tap_handler() - Packet handler for tap file descriptor * @c: Execution context - * @len: Total L2 packet length - * @in: Packet buffer, L2 headers + * + * Return: -ECONNRESET if tap connection was lost, 0 otherwise */ -static void tap_handler(struct ctx *c, char *in, size_t len) +static int tap_handler(struct ctx *c) { - struct ethhdr *eh = (struct ethhdr *)in; + char buf[ETH_MAX_MTU]; + struct ethhdr *eh; + uint32_t vnet_len; + ssize_t n; + + eh = (struct ethhdr *)buf; - if (eh->h_proto == ntohs(ETH_P_IP) || eh->h_proto == ntohs(ETH_P_ARP)) - tap4_handler(c, in, len); - else if (eh->h_proto == ntohs(ETH_P_IPV6)) - tap6_handler(c, in, len); + while ((n = recv(c->fd_unix, &vnet_len, 4, MSG_DONTWAIT)) == 4) { + n = recv(c->fd_unix, buf, ntohl(vnet_len), MSG_DONTWAIT); + + if (n < (ssize_t)sizeof(*eh)) + break; + + switch (ntohs(eh->h_proto)) { + case ETH_P_IP: + case ETH_P_ARP: + tap4_handler(c, buf, n); + break; + case ETH_P_IPV6: + tap6_handler(c, buf, n); + break; + default: + break; + } + } + + if (n >= 0 || errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) + return 0; + + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_unix, NULL); + close(c->fd_unix); + + return -ECONNRESET; } /** @@ -429,29 +450,30 @@ static void sock_handler(struct ctx *c, int fd, uint32_t events) sl = sizeof(so); if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &so, &sl) || - so == SOCK_STREAM) + so == SOCK_STREAM) { + fprintf(stderr, "TCP: packet from socket %i\n", fd); tcp_sock_handler(c, fd, events); - else if (so == SOCK_DGRAM) + } + else if (so == SOCK_DGRAM) { udp_sock_handler(c, fd, events); + fprintf(stderr, "UDP: packet from socket %i\n", fd); + } } /** - * periodic_handler() - Run periodic tasks for L4 protocol handlers + * timer_handler() - Run periodic tasks for L4 protocol handlers * @c: Execution context * @last: Timestamp of last run, updated on return */ -static void periodic_handler(struct ctx *c, struct timespec *last) +static void timer_handler(struct ctx *c, struct timespec *last) { struct timespec tmp; - int elapsed_ms; clock_gettime(CLOCK_MONOTONIC, &tmp); - elapsed_ms = timespec_diff_ms(&tmp, last); + if (timespec_diff_ms(&tmp, last) < TIMER_INTERVAL) + return; - if (elapsed_ms >= PERIODIC_HANDLER_FAST) - tcp_periodic_fast(c); - if (elapsed_ms >= PERIODIC_HANDLER_SLOW) - tcp_periodic_slow(c); + tcp_timer(c, &tmp); *last = tmp; } @@ -481,10 +503,8 @@ int main(int argc, char **argv) struct epoll_event events[EPOLL_EVENTS]; struct epoll_event ev = { 0 }; struct timespec last_time; - char buf[ETH_MAX_MTU]; struct ctx c = { 0 }; - int nfds, i, len; - int fd_unix; + int nfds, i, fd_unix; if (argc != 1) usage(argv[0]); @@ -537,14 +557,14 @@ listen: "./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio\n\n"); c.fd_unix = accept(fd_unix, NULL, NULL); - ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP; + ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP | EPOLLERR | EPOLLHUP; ev.data.fd = c.fd_unix; epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev); clock_gettime(CLOCK_MONOTONIC, &last_time); loop: - nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, EPOLL_TIMEOUT); + nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL); if (nfds == -1 && errno != EINTR) { perror("epoll_wait"); exit(EXIT_FAILURE); @@ -552,36 +572,16 @@ loop: for (i = 0; i < nfds; i++) { if (events[i].data.fd == c.fd_unix) { - len = recv(events[i].data.fd, buf, sizeof(buf), - MSG_DONTWAIT); - - if (len <= 0) { - epoll_ctl(c.epollfd, EPOLL_CTL_DEL, c.fd_unix, - &ev); - close(c.fd_unix); + if (tap_handler(&c)) goto listen; - } - - if (len == 0 || (len < 0 && errno == EINTR)) - continue; - - if (len < 0) { - if (errno == EAGAIN || errno == EWOULDBLOCK) - break; - goto out; - } - - tap_handler(&c, buf + 4, ntohl(*(uint32_t *)buf)); } else { sock_handler(&c, events[i].data.fd, events[i].events); } } - periodic_handler(&c, &last_time); - clock_gettime(CLOCK_MONOTONIC, &last_time); + timer_handler(&c, &last_time); goto loop; -out: return 0; } |