diff options
Diffstat (limited to 'checksum.c')
| -rw-r--r-- | checksum.c | 193 |
1 files changed, 152 insertions, 41 deletions
@@ -59,6 +59,7 @@ #include "util.h" #include "ip.h" #include "checksum.h" +#include "iov.h" /* Checksums are optional for UDP over IPv4, so we usually just set * them to 0. Change this to 1 to calculate real UDP over IPv4 @@ -84,7 +85,7 @@ */ /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ __attribute__((optimize("-fno-strict-aliasing"))) -uint32_t sum_16b(const void *buf, size_t len) +static uint32_t sum_16b(const void *buf, size_t len) { const uint16_t *p = buf; uint32_t sum = 0; @@ -106,7 +107,7 @@ uint32_t sum_16b(const void *buf, size_t len) * * Return: 16-bit folded sum */ -uint16_t csum_fold(uint32_t sum) +static uint16_t csum_fold(uint32_t sum) { while (sum >> 16) sum = (sum & 0xffff) + (sum >> 16); @@ -144,7 +145,7 @@ uint16_t csum_ip4_header(uint16_t l3len, uint8_t protocol, * @proto: Protocol number * @saddr: Source address * @daddr: Destination address - * Returns: Partial checksum of the IPv4 header + * Return: partial checksum of the IPv4 header */ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol, struct in_addr saddr, struct in_addr daddr) @@ -161,26 +162,41 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol, } /** + * csum() - Compute TCP/IP-style checksum + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 16-bit folded, complemented checksum + */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ +static uint16_t csum(const void *buf, size_t len, uint32_t init) +{ + return (uint16_t)~csum_fold(csum_unfolded(buf, len, init)); +} + +/** * csum_udp4() - Calculate and set checksum for a UDP over IPv4 packet * @udp4hr: UDP header, initialised apart from checksum * @saddr: IPv4 source address * @daddr: IPv4 destination address - * @payload: UDP packet payload - * @dlen: Length of @payload (not including UDP header) + * @data: UDP payload (as IO vector tail) */ void csum_udp4(struct udphdr *udp4hr, struct in_addr saddr, struct in_addr daddr, - const void *payload, size_t dlen) + struct iov_tail *data) { /* UDP checksums are optional, so don't bother */ udp4hr->check = 0; if (UDP4_REAL_CHECKSUMS) { - uint16_t l4len = dlen + sizeof(struct udphdr); + uint16_t l4len = iov_tail_size(data) + sizeof(struct udphdr); uint32_t psum = proto_ipv4_header_psum(l4len, IPPROTO_UDP, saddr, daddr); + psum = csum_unfolded(udp4hr, sizeof(struct udphdr), psum); - udp4hr->check = csum(payload, dlen, psum); + udp4hr->check = csum_iov_tail(data, psum); } } @@ -209,7 +225,7 @@ void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t dlen) * @proto: Protocol number * @saddr: Source address * @daddr: Destination address - * Returns: Partial checksum of the IPv6 header + * Return: partial checksum of the IPv6 header */ uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, const struct in6_addr *saddr, @@ -226,19 +242,22 @@ uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, /** * csum_udp6() - Calculate and set checksum for a UDP over IPv6 packet * @udp6hr: UDP header, initialised apart from checksum - * @payload: UDP packet payload - * @dlen: Length of @payload (not including UDP header) + * @saddr: Source address + * @daddr: Destination address + * @data: UDP payload (as IO vector tail) */ void csum_udp6(struct udphdr *udp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, - const void *payload, size_t dlen) + struct iov_tail *data) { - uint32_t psum = proto_ipv6_header_psum(dlen + sizeof(struct udphdr), - IPPROTO_UDP, saddr, daddr); + uint16_t l4len = iov_tail_size(data) + sizeof(struct udphdr); + uint32_t psum = proto_ipv6_header_psum(l4len, IPPROTO_UDP, + saddr, daddr); + udp6hr->check = 0; psum = csum_unfolded(udp6hr, sizeof(struct udphdr), psum); - udp6hr->check = csum(payload, dlen, psum); + udp6hr->check = csum_iov_tail(data, psum); } /** @@ -262,7 +281,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, icmp6hr->icmp6_cksum = csum(payload, dlen, psum); } -#ifdef __AVX2__ +#if defined(__AVX2__) #include <immintrin.h> /** @@ -433,7 +452,7 @@ less_than_128_bytes: } /** - * csum_unfolded - Calculate the unfolded checksum of a data buffer. + * csum_unfolded() - Calculate the unfolded checksum of a data buffer. * * @buf: Input buffer * @len: Input length @@ -448,7 +467,8 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) intptr_t align = ROUND_UP((intptr_t)buf, sizeof(__m256i)); unsigned int pad = align - (intptr_t)buf; - if (len < pad) + /* Don't mix sum_16b() and csum_avx2() with odd padding lengths */ + if (pad & 1 || len < pad) pad = len; if (pad) @@ -459,9 +479,97 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) return init; } -#else /* __AVX2__ */ +#elif defined(__POWER9_VECTOR__) || defined(__POWER8_VECTOR__) +#include <altivec.h> + /** - * csum_unfolded - Calculate the unfolded checksum of a data buffer. + * csum_vsx() - Compute 32-bit checksum using VSX SIMD instructions + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit checksum, not complemented, not folded + */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ +static uint32_t csum_vsx(const void *buf, size_t len, uint32_t init) +{ + const uint8_t *p = buf; + vector unsigned int sum_even = vec_splat_u32(0); + vector unsigned int sum_odd = vec_splat_u32(0); + const vector unsigned short ones = vec_splat_u16(1); + uint64_t sum64 = init; + +#ifdef __POWER9_VECTOR__ + while (len >= 64) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned char v2b = vec_vsx_ld(32, p); + vector unsigned char v3b = vec_vsx_ld(48, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + vector unsigned short v2 = (vector unsigned short)v2b; + vector unsigned short v3 = (vector unsigned short)v3b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + sum_even = vec_add(sum_even, vec_mule(v2, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v2, ones)); + sum_even = vec_add(sum_even, vec_mule(v3, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v3, ones)); + + p += 64; + len -= 64; + } +#endif + + while (len >= 32) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + + p += 32; + len -= 32; + } + + while (len >= 16) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned short v0 = (vector unsigned short)v0b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + + p += 16; + len -= 16; + } + + { + vector unsigned int sum32 = vec_add(sum_even, sum_odd); + uint32_t partial[4] __attribute__((aligned(16))); + + vec_st(sum32, 0, partial); + sum64 += (uint64_t)partial[0] + partial[1] + + partial[2] + partial[3]; + } + + sum64 += sum_16b(p, len); + + sum64 = (sum64 >> 32) + (sum64 & 0xffffffff); + sum64 += sum64 >> 32; + + return (uint32_t)sum64; +} + +/** + * csum_unfolded() - Calculate the unfolded checksum of a data buffer. * * @buf: Input buffer * @len: Input length @@ -473,41 +581,44 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) __attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) { - return sum_16b(buf, len) + init; + return csum_vsx(buf, len, init); } -#endif /* !__AVX2__ */ - +#else /* !__AVX2__ && !__POWER9_VECTOR__ && !__POWER8_VECTOR__ */ /** - * csum() - Compute TCP/IP-style checksum - * @buf: Input buffer - * @len: Input length - * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * csum_unfolded() - Calculate the unfolded checksum of a data buffer. * - * Return: 16-bit folded, complemented checksum + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit unfolded checksum */ /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ __attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ -uint16_t csum(const void *buf, size_t len, uint32_t init) +uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) { - return (uint16_t)~csum_fold(csum_unfolded(buf, len, init)); + return sum_16b(buf, len) + init; } +#endif /* !__AVX2__ && !__POWER9_VECTOR__ && !__POWER8_VECTOR__ */ /** - * csum_iov() - Calculates the unfolded checksum over an array of IO vectors - * - * @iov Pointer to the array of IO vectors - * @n Length of the array + * csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector + * @tail: IO vector tail to checksum * @init Initial 32-bit checksum, 0 for no pre-computed checksum * * Return: 16-bit folded, complemented checksum */ -/* cppcheck-suppress unusedFunction */ -uint16_t csum_iov(const struct iovec *iov, size_t n, uint32_t init) +uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init) { - unsigned int i; - - for (i = 0; i < n; i++) - init = csum_unfolded(iov[i].iov_base, iov[i].iov_len, init); - + if (iov_tail_prune(tail)) { + size_t i; + + init = csum_unfolded((char *)tail->iov[0].iov_base + tail->off, + tail->iov[0].iov_len - tail->off, init); + for (i = 1; i < tail->cnt; i++) { + const struct iovec *iov = &tail->iov[i]; + init = csum_unfolded(iov->iov_base, iov->iov_len, init); + } + } return (uint16_t)~csum_fold(init); } |
