19extern const char LSH256_AVX_FNAME[] = __FILE__;
21#if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
23#if defined(CRYPTOPP_AVX2_AVAILABLE)
24# include <emmintrin.h>
25# include <immintrin.h>
29#if (CRYPTOPP_GCC_VERSION >= 40500)
30# include <x86intrin.h>
33ANONYMOUS_NAMESPACE_BEGIN
37const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
40const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
43const unsigned int CV_WORD_LEN = 16;
44const unsigned int CONST_WORD_LEN = 8;
47const unsigned int NUM_STEPS = 26;
49const unsigned int ROT_EVEN_ALPHA = 29;
50const unsigned int ROT_EVEN_BETA = 1;
51const unsigned int ROT_ODD_ALPHA = 5;
52const unsigned int ROT_ODD_BETA = 17;
54const unsigned int LSH_TYPE_256_256 = 0x0000020;
55const unsigned int LSH_TYPE_256_224 = 0x000001C;
62const unsigned int LSH_SUCCESS = 0x0;
65const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
66const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
70const unsigned int AlgorithmType = 80;
71const unsigned int RemainingBits = 81;
79extern const word32 LSH256_IV224[CV_WORD_LEN];
80extern const word32 LSH256_IV256[CV_WORD_LEN];
81extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
86ANONYMOUS_NAMESPACE_BEGIN
93using CryptoPP::GetBlock;
104using CryptoPP::LSH::LSH256_IV224;
105using CryptoPP::LSH::LSH256_IV256;
106using CryptoPP::LSH::LSH256_StepConstants;
108struct LSH256_AVX2_Context
111 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
112 last_block(reinterpret_cast<
byte*>(state+48)),
113 remain_databitlen(remainingBitLength),
114 alg_type(static_cast<lsh_type>(algType)) {}
120 lsh_u32& remain_databitlen;
124struct LSH256_AVX2_Internal
126 LSH256_AVX2_Internal(
word32* state) :
127 submsg_e_l(state+16), submsg_e_r(state+24),
128 submsg_o_l(state+32), submsg_o_r(state+40) { }
152inline bool LSH_IS_LSH512(lsh_uint val) {
153 return (val & 0xf0000) == 0;
156inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
160inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
164inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
165 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
168inline lsh_u32 loadLE32(lsh_u32 v) {
172lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
177inline void load_msg_blk(LSH256_AVX2_Internal* i_state,
const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
181 lsh_u32* submsg_e_l = i_state->submsg_e_l;
182 lsh_u32* submsg_e_r = i_state->submsg_e_r;
183 lsh_u32* submsg_o_l = i_state->submsg_o_l;
184 lsh_u32* submsg_o_r = i_state->submsg_o_r;
186 _mm256_storeu_si256(M256_CAST(submsg_e_l+0),
187 _mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
188 _mm256_storeu_si256(M256_CAST(submsg_e_r+0),
189 _mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
190 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
191 _mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
192 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
193 _mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
196inline void msg_exp_even(LSH256_AVX2_Internal* i_state)
200 lsh_u32* submsg_e_l = i_state->submsg_e_l;
201 lsh_u32* submsg_e_r = i_state->submsg_e_r;
202 lsh_u32* submsg_o_l = i_state->submsg_o_l;
203 lsh_u32* submsg_o_r = i_state->submsg_o_r;
205 const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
206 0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
208 _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi32(
209 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
211 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)), mask)));
212 _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi32(
213 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
215 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)), mask)));
218inline void msg_exp_odd(LSH256_AVX2_Internal* i_state)
222 lsh_u32* submsg_e_l = i_state->submsg_e_l;
223 lsh_u32* submsg_e_r = i_state->submsg_e_r;
224 lsh_u32* submsg_o_l = i_state->submsg_o_l;
225 lsh_u32* submsg_o_r = i_state->submsg_o_r;
227 const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
228 0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
230 _mm256_storeu_si256(M256_CAST(submsg_o_l+0), _mm256_add_epi32(
231 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
233 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)), mask)));
234 _mm256_storeu_si256(M256_CAST(submsg_o_r+0), _mm256_add_epi32(
235 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
237 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)), mask)));
240inline void load_sc(
const lsh_u32** p_const_v,
size_t i)
244 *p_const_v = &LSH256_StepConstants[i];
247inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
251 lsh_u32* submsg_e_l = i_state->submsg_e_l;
252 lsh_u32* submsg_e_r = i_state->submsg_e_r;
254 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_xor_si256(
255 _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)),
256 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0))));
257 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_xor_si256(
258 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
259 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0))));
262inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
266 lsh_u32* submsg_o_l = i_state->submsg_o_l;
267 lsh_u32* submsg_o_r = i_state->submsg_o_r;
269 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
270 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
271 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
272 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
273 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
274 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
277inline void add_blk(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
279 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi32(
280 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
281 _mm256_loadu_si256(CONST_M256_CAST(cv_r))));
284template <
unsigned int R>
285inline void rotate_blk(lsh_u32 cv[8])
287 _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
288 _mm256_slli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
289 _mm256_srli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), 32-R)));
292inline void xor_with_const(lsh_u32 cv_l[8],
const lsh_u32 const_v[8])
294 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
295 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
296 _mm256_loadu_si256(CONST_M256_CAST(const_v))));
299inline void rotate_msg_gamma(lsh_u32 cv_r[8])
302 _mm256_storeu_si256(M256_CAST(cv_r+0),
303 _mm256_shuffle_epi8(_mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
305 15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1,
306 12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
309inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
311 __m256i temp = _mm256_shuffle_epi32(
312 _mm256_loadu_si256(CONST_M256_CAST(cv_l)), _MM_SHUFFLE(3,1,0,2));
313 _mm256_storeu_si256(M256_CAST(cv_r),
314 _mm256_shuffle_epi32(
315 _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(1,2,3,0)));
316 _mm256_storeu_si256(M256_CAST(cv_l),
317 _mm256_permute2x128_si256(temp,
318 _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,3,0,1)));
319 _mm256_storeu_si256(M256_CAST(cv_r),
320 _mm256_permute2x128_si256(temp,
321 _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,2,0,0)));
328template <
unsigned int Alpha,
unsigned int Beta>
329inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8],
const lsh_u32 const_v[8])
332 rotate_blk<Alpha>(cv_l);
333 xor_with_const(cv_l, const_v);
335 rotate_blk<Beta>(cv_r);
337 rotate_msg_gamma(cv_r);
344inline void compress(LSH256_AVX2_Context* ctx,
const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
348 LSH256_AVX2_Internal s_state(ctx->cv_l);
349 LSH256_AVX2_Internal* i_state = &s_state;
351 const lsh_u32* const_v = NULL;
352 lsh_u32* cv_l = ctx->cv_l;
353 lsh_u32* cv_r = ctx->cv_r;
355 load_msg_blk(i_state, pdMsgBlk);
357 msg_add_even(cv_l, cv_r, i_state);
358 load_sc(&const_v, 0);
359 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
360 word_perm(cv_l, cv_r);
362 msg_add_odd(cv_l, cv_r, i_state);
363 load_sc(&const_v, 8);
364 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
365 word_perm(cv_l, cv_r);
367 for (
size_t i = 1; i < NUM_STEPS / 2; i++)
369 msg_exp_even(i_state);
370 msg_add_even(cv_l, cv_r, i_state);
371 load_sc(&const_v, 16 * i);
372 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
373 word_perm(cv_l, cv_r);
375 msg_exp_odd(i_state);
376 msg_add_odd(cv_l, cv_r, i_state);
377 load_sc(&const_v, 16 * i + 8);
378 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
379 word_perm(cv_l, cv_r);
382 msg_exp_even(i_state);
383 msg_add_even(cv_l, cv_r, i_state);
391 _mm256_storeu_si256(M256_CAST(cv_l+0),
392 _mm256_load_si256(CONST_M256_CAST(iv+0)));
393 _mm256_storeu_si256(M256_CAST(cv_r+0),
394 _mm256_load_si256(CONST_M256_CAST(iv+8)));
397inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
399 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
400 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
403inline void zero_submsgs(LSH256_AVX2_Context* ctx)
405 lsh_u32* sub_msgs = ctx->sub_msgs;
407 _mm256_storeu_si256(M256_CAST(sub_msgs+ 0), _mm256_setzero_si256());
408 _mm256_storeu_si256(M256_CAST(sub_msgs+ 8), _mm256_setzero_si256());
409 _mm256_storeu_si256(M256_CAST(sub_msgs+16), _mm256_setzero_si256());
410 _mm256_storeu_si256(M256_CAST(sub_msgs+24), _mm256_setzero_si256());
413inline void init224(LSH256_AVX2_Context* ctx)
418 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
421inline void init256(LSH256_AVX2_Context* ctx)
426 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
431inline void fin(LSH256_AVX2_Context* ctx)
435 _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
436 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
437 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
442inline void get_hash(LSH256_AVX2_Context* ctx, lsh_u8* pbHashVal)
448 lsh_uint alg_type = ctx->alg_type;
449 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
450 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
453 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
454 if (hash_val_bit_len){
455 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
461lsh_err lsh256_init_avx2(LSH256_AVX2_Context* ctx)
466 lsh_u32 alg_type = ctx->alg_type;
467 const lsh_u32* const_v = NULL;
468 ctx->remain_databitlen = 0;
475 case LSH_TYPE_256_256:
478 case LSH_TYPE_256_224:
485 lsh_u32* cv_l = ctx->cv_l;
486 lsh_u32* cv_r = ctx->cv_r;
489 cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
490 cv_l[1] = LSH_GET_HASHBIT(alg_type);
492 for (
size_t i = 0; i < NUM_STEPS / 2; i++)
495 load_sc(&const_v, i * 16);
496 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
497 word_perm(cv_l, cv_r);
499 load_sc(&const_v, i * 16 + 8);
500 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
501 word_perm(cv_l, cv_r);
507lsh_err lsh256_update_avx2(LSH256_AVX2_Context* ctx,
const lsh_u8* data,
size_t databitlen)
517 if (databitlen == 0){
522 size_t databytelen = databitlen >> 3;
524 const size_t pos2 = 0;
526 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
528 const size_t remain_msg_bit = 0;
530 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
531 return LSH_ERR_INVALID_STATE;
533 if (remain_msg_bit > 0){
534 return LSH_ERR_INVALID_DATABITLEN;
537 if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
539 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
540 ctx->remain_databitlen += (lsh_uint)databitlen;
541 remain_msg_byte += (lsh_uint)databytelen;
543 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
548 if (remain_msg_byte > 0){
549 size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
550 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
551 compress(ctx, ctx->last_block);
553 databytelen -= more_byte;
555 ctx->remain_databitlen = 0;
558 while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
564 data += LSH256_MSG_BLK_BYTE_LEN;
565 databytelen -= LSH256_MSG_BLK_BYTE_LEN;
568 if (databytelen > 0){
569 memcpy(ctx->last_block, data, databytelen);
570 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
574 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
575 ctx->remain_databitlen += pos2;
581lsh_err lsh256_final_avx2(LSH256_AVX2_Context* ctx, lsh_u8* hashval)
590 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
592 const size_t remain_msg_bit = 0;
594 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
595 return LSH_ERR_INVALID_STATE;
599 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
602 ctx->last_block[remain_msg_byte] = 0x80;
604 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
606 compress(ctx, ctx->last_block);
609 get_hash(ctx, hashval);
614ANONYMOUS_NAMESPACE_END
619void LSH256_Base_Restart_AVX2(
word32* state)
621 state[RemainingBits] = 0;
622 LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
623 lsh_err err = lsh256_init_avx2(&ctx);
625 if (err != LSH_SUCCESS)
630void LSH256_Base_Update_AVX2(
word32* state,
const byte *input,
size_t size)
632 LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
633 lsh_err err = lsh256_update_avx2(&ctx, input, 8*size);
635 if (err != LSH_SUCCESS)
640void LSH256_Base_TruncatedFinal_AVX2(
word32* state,
byte *hash,
size_t)
642 LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
643 lsh_err err = lsh256_final_avx2(&ctx, hash);
645 if (err != LSH_SUCCESS)
Base class for all exceptions thrown by the library.
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Library configuration file.
unsigned char byte
8-bit unsigned datatype
unsigned int word32
32-bit unsigned datatype
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Crypto++ library namespace.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.