aboutgitcodebugslistschat
path: root/checksum.c
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-10-15 16:59:12 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-10-15 16:59:12 +0200
commit74f29d3148d34d166c040e6cf1f626245c0d479a (patch)
treef1aea202945e6e1b2f98b69d841e3658b6861555 /checksum.c
parent364cc313ea316a1f7d9b3bc0ec312693150cb1ef (diff)
downloadpasst-74f29d3148d34d166c040e6cf1f626245c0d479a.tar
passt-74f29d3148d34d166c040e6cf1f626245c0d479a.tar.gz
passt-74f29d3148d34d166c040e6cf1f626245c0d479a.tar.bz2
passt-74f29d3148d34d166c040e6cf1f626245c0d479a.tar.lz
passt-74f29d3148d34d166c040e6cf1f626245c0d479a.tar.xz
passt-74f29d3148d34d166c040e6cf1f626245c0d479a.tar.zst
passt-74f29d3148d34d166c040e6cf1f626245c0d479a.zip
checksum: Interleave lo/hi sums while folding into 128-bit sums, drop TODO
I left a TODO and never checked -- this actually seems to slightly improve CPIs on AMD Naples (two 128-bit FMA units glued together). Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Diffstat (limited to 'checksum.c')
-rw-r--r--checksum.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/checksum.c b/checksum.c
index 9c8a458..c79c9d7 100644
--- a/checksum.c
+++ b/checksum.c
@@ -217,9 +217,9 @@ static uint32_t csum_avx2(const void *buf, size_t len, uint32_t init)
_mm256_unpacklo_epi32(b, zero));
}
- /* Fold four 256bit sums into one 128-bit sum. TODO */
- sum256 = _mm256_add_epi64(_mm256_add_epi64(sum_a_hi, sum_a_lo),
- _mm256_add_epi64(sum_b_hi, sum_b_lo));
+ /* Fold four 256bit sums into one 128-bit sum. */
+ sum256 = _mm256_add_epi64(_mm256_add_epi64(sum_a_hi, sum_b_lo),
+ _mm256_add_epi64(sum_b_hi, sum_a_lo));
sum128 = _mm_add_epi64(_mm256_extracti128_si256(sum256, 0),
_mm256_extracti128_si256(sum256, 1));