5#ifndef RTE_XXH64_AVX512_H
6#define RTE_XXH64_AVX512_H
16static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
18static const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;
20static const uint64_t PRIME64_3 = 0x165667B19E3779F9ULL;
22static const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63ULL;
24static const uint64_t PRIME64_5 = 0x27D4EB2F165667C5ULL;
27xxh64_round_avx512(__m512i hash, __m512i input)
29 hash = _mm512_madd52lo_epu64(hash,
31 _mm512_set1_epi64(PRIME64_2));
33 hash = _mm512_rol_epi64(hash, 31);
39xxh64_fmix_avx512(__m512i hash)
41 hash = _mm512_xor_si512(hash, _mm512_srli_epi64(hash, 33));
47rte_xxh64_sketch_avx512(
const void *key, uint32_t key_len,
48 __m512i v_seed, uint32_t modulo)
50 __m512i v_prime64_5, v_hash;
51 size_t remaining = key_len;
55 v_prime64_5 = _mm512_set1_epi64(PRIME64_5);
56 v_hash = _mm512_add_epi64
57 (_mm512_add_epi64(v_seed, v_prime64_5),
58 _mm512_set1_epi64(key_len));
60 while (remaining >= 8) {
61 input = _mm512_set1_epi64(*(uint64_t *)
RTE_PTR_ADD(key, offset));
62 v_hash = _mm512_xor_epi64(v_hash,
63 xxh64_round_avx512(_mm512_setzero_si512(), input));
64 v_hash = _mm512_madd52lo_epu64(_mm512_set1_epi64(PRIME64_4),
66 _mm512_set1_epi64(PRIME64_1));
73 input = _mm512_set1_epi64
75 v_hash = _mm512_xor_epi64(v_hash,
76 _mm512_mullo_epi64(input,
77 _mm512_set1_epi64(PRIME64_1)));
78 v_hash = _mm512_madd52lo_epu64
79 (_mm512_set1_epi64(PRIME64_3),
80 _mm512_rol_epi64(v_hash, 23),
81 _mm512_set1_epi64(PRIME64_2));
87 while (remaining != 0) {
88 input = _mm512_set1_epi64
90 v_hash = _mm512_xor_epi64(v_hash,
91 _mm512_mullo_epi64(input,
92 _mm512_set1_epi64(PRIME64_5)));
93 v_hash = _mm512_mullo_epi64
94 (_mm512_rol_epi64(v_hash, 11),
95 _mm512_set1_epi64(PRIME64_1));
100 v_hash = xxh64_fmix_avx512(v_hash);
108 __m512i v_hash_remainder = _mm512_set1_epi64((modulo - 1));
110 return _mm512_cvtepi64_epi32(_mm512_and_si512(v_hash, v_hash_remainder));
#define RTE_PTR_ADD(ptr, x)
#define __rte_always_inline