20extern const char LSH256_SSE_FNAME[] = __FILE__;
22#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
24#if defined(CRYPTOPP_SSSE3_AVAILABLE)
25# include <emmintrin.h>
26# include <tmmintrin.h>
29#if defined(CRYPTOPP_XOP_AVAILABLE)
30# include <ammintrin.h>
34#if (CRYPTOPP_GCC_VERSION >= 40500)
35# include <x86intrin.h>
38ANONYMOUS_NAMESPACE_BEGIN
42const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
45const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
48const unsigned int CV_WORD_LEN = 16;
49const unsigned int CONST_WORD_LEN = 8;
52const unsigned int NUM_STEPS = 26;
54const unsigned int ROT_EVEN_ALPHA = 29;
55const unsigned int ROT_EVEN_BETA = 1;
56const unsigned int ROT_ODD_ALPHA = 5;
57const unsigned int ROT_ODD_BETA = 17;
59const unsigned int LSH_TYPE_256_256 = 0x0000020;
60const unsigned int LSH_TYPE_256_224 = 0x000001C;
67const unsigned int LSH_SUCCESS = 0x0;
70const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
71const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
75const unsigned int AlgorithmType = 80;
76const unsigned int RemainingBits = 81;
84extern const word32 LSH256_IV224[CV_WORD_LEN];
85extern const word32 LSH256_IV256[CV_WORD_LEN];
86extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
91ANONYMOUS_NAMESPACE_BEGIN
98using CryptoPP::GetBlock;
109using CryptoPP::LSH::LSH256_IV224;
110using CryptoPP::LSH::LSH256_IV256;
111using CryptoPP::LSH::LSH256_StepConstants;
113struct LSH256_SSSE3_Context
116 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
117 last_block(reinterpret_cast<
byte*>(state+48)),
118 remain_databitlen(remainingBitLength),
119 alg_type(static_cast<lsh_type>(algType)) {}
125 lsh_u32& remain_databitlen;
129struct LSH256_SSSE3_Internal
131 LSH256_SSSE3_Internal(
word32* state) :
132 submsg_e_l(state+16), submsg_e_r(state+24),
133 submsg_o_l(state+32), submsg_o_r(state+40) { }
145inline bool LSH_IS_LSH512(lsh_uint val) {
146 return (val & 0xf0000) == 0;
149inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
153inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
157inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
158 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
161inline lsh_u32 loadLE32(lsh_u32 v) {
165lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
170inline void load_msg_blk(LSH256_SSSE3_Internal* i_state,
const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
173 lsh_u32* submsg_e_l = i_state->submsg_e_l;
174 lsh_u32* submsg_e_r = i_state->submsg_e_r;
175 lsh_u32* submsg_o_l = i_state->submsg_o_l;
176 lsh_u32* submsg_o_r = i_state->submsg_o_r;
178 _mm_storeu_si128(
M128_CAST(submsg_e_l+0),
180 _mm_storeu_si128(
M128_CAST(submsg_e_l+4),
182 _mm_storeu_si128(
M128_CAST(submsg_e_r+0),
184 _mm_storeu_si128(
M128_CAST(submsg_e_r+4),
186 _mm_storeu_si128(
M128_CAST(submsg_o_l+0),
188 _mm_storeu_si128(
M128_CAST(submsg_o_l+4),
190 _mm_storeu_si128(
M128_CAST(submsg_o_r+0),
192 _mm_storeu_si128(
M128_CAST(submsg_o_r+4),
196inline void msg_exp_even(LSH256_SSSE3_Internal* i_state)
200 lsh_u32* submsg_e_l = i_state->submsg_e_l;
201 lsh_u32* submsg_e_r = i_state->submsg_e_r;
202 lsh_u32* submsg_o_l = i_state->submsg_o_l;
203 lsh_u32* submsg_o_r = i_state->submsg_o_r;
205 _mm_storeu_si128(
M128_CAST(submsg_e_l+0), _mm_add_epi32(
209 _mm_loadu_si128(
CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(1,0,2,3))));
211 _mm_storeu_si128(
M128_CAST(submsg_e_l+4), _mm_add_epi32(
215 _mm_loadu_si128(
CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(2,1,0,3))));
217 _mm_storeu_si128(
M128_CAST(submsg_e_r+0), _mm_add_epi32(
221 _mm_loadu_si128(
CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(1,0,2,3))));
223 _mm_storeu_si128(
M128_CAST(submsg_e_r+4), _mm_add_epi32(
227 _mm_loadu_si128(
CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(2,1,0,3))));
230inline void msg_exp_odd(LSH256_SSSE3_Internal* i_state)
234 lsh_u32* submsg_e_l = i_state->submsg_e_l;
235 lsh_u32* submsg_e_r = i_state->submsg_e_r;
236 lsh_u32* submsg_o_l = i_state->submsg_o_l;
237 lsh_u32* submsg_o_r = i_state->submsg_o_r;
239 _mm_storeu_si128(
M128_CAST(submsg_o_l+0), _mm_add_epi32(
243 _mm_loadu_si128(
CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(1,0,2,3))));
245 _mm_storeu_si128(
M128_CAST(submsg_o_l+4), _mm_add_epi32(
249 _mm_loadu_si128(
CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(2,1,0,3))));
251 _mm_storeu_si128(
M128_CAST(submsg_o_r+0), _mm_add_epi32(
255 _mm_loadu_si128(
CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(1,0,2,3))));
257 _mm_storeu_si128(
M128_CAST(submsg_o_r+4), _mm_add_epi32(
261 _mm_loadu_si128(
CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(2,1,0,3))));
264inline void load_sc(
const lsh_u32** p_const_v,
size_t i)
268 *p_const_v = &LSH256_StepConstants[i];
271inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
275 lsh_u32* submsg_e_l = i_state->submsg_e_l;
276 lsh_u32* submsg_e_r = i_state->submsg_e_r;
278 _mm_storeu_si128(
M128_CAST(cv_l+0), _mm_xor_si128(
281 _mm_storeu_si128(
M128_CAST(cv_l+4), _mm_xor_si128(
284 _mm_storeu_si128(
M128_CAST(cv_r+0), _mm_xor_si128(
287 _mm_storeu_si128(
M128_CAST(cv_r+4), _mm_xor_si128(
292inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
296 lsh_u32* submsg_o_l = i_state->submsg_o_l;
297 lsh_u32* submsg_o_r = i_state->submsg_o_r;
299 _mm_storeu_si128(
M128_CAST(cv_l), _mm_xor_si128(
302 _mm_storeu_si128(
M128_CAST(cv_l+4), _mm_xor_si128(
305 _mm_storeu_si128(
M128_CAST(cv_r), _mm_xor_si128(
308 _mm_storeu_si128(
M128_CAST(cv_r+4), _mm_xor_si128(
313inline void add_blk(lsh_u32 cv_l[8],
const lsh_u32 cv_r[8])
315 _mm_storeu_si128(
M128_CAST(cv_l), _mm_add_epi32(
318 _mm_storeu_si128(
M128_CAST(cv_l+4), _mm_add_epi32(
323template <
unsigned int R>
324inline void rotate_blk(lsh_u32 cv[8])
326#if defined(CRYPTOPP_XOP_AVAILABLE)
332 _mm_storeu_si128(
M128_CAST(cv), _mm_or_si128(
335 _mm_storeu_si128(
M128_CAST(cv+4), _mm_or_si128(
341inline void xor_with_const(lsh_u32* cv_l,
const lsh_u32* const_v)
343 _mm_storeu_si128(
M128_CAST(cv_l), _mm_xor_si128(
346 _mm_storeu_si128(
M128_CAST(cv_l+4), _mm_xor_si128(
351inline void rotate_msg_gamma(lsh_u32 cv_r[8])
356 _mm_set_epi8(12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
359 _mm_set_epi8(15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1)));
362inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
364 _mm_storeu_si128(
M128_CAST(cv_l+0), _mm_shuffle_epi32(
366 _mm_storeu_si128(
M128_CAST(cv_l+4), _mm_shuffle_epi32(
368 _mm_storeu_si128(
M128_CAST(cv_r+0), _mm_shuffle_epi32(
370 _mm_storeu_si128(
M128_CAST(cv_r+4), _mm_shuffle_epi32(
380 _mm_storeu_si128(
M128_CAST(cv_r+0), temp);
387template <
unsigned int Alpha,
unsigned int Beta>
388inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8],
const lsh_u32 const_v[8])
391 rotate_blk<Alpha>(cv_l);
392 xor_with_const(cv_l, const_v);
394 rotate_blk<Beta>(cv_r);
396 rotate_msg_gamma(cv_r);
403inline void compress(LSH256_SSSE3_Context* ctx,
const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
407 LSH256_SSSE3_Internal s_state(ctx->cv_l);
408 LSH256_SSSE3_Internal* i_state = &s_state;
410 const lsh_u32* const_v = NULL;
411 lsh_u32* cv_l = ctx->cv_l;
412 lsh_u32* cv_r = ctx->cv_r;
414 load_msg_blk(i_state, pdMsgBlk);
416 msg_add_even(cv_l, cv_r, i_state);
417 load_sc(&const_v, 0);
418 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
419 word_perm(cv_l, cv_r);
421 msg_add_odd(cv_l, cv_r, i_state);
422 load_sc(&const_v, 8);
423 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
424 word_perm(cv_l, cv_r);
426 for (
size_t i = 1; i < NUM_STEPS / 2; i++)
428 msg_exp_even(i_state);
429 msg_add_even(cv_l, cv_r, i_state);
430 load_sc(&const_v, 16 * i);
431 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
432 word_perm(cv_l, cv_r);
434 msg_exp_odd(i_state);
435 msg_add_odd(cv_l, cv_r, i_state);
436 load_sc(&const_v, 16 * i + 8);
437 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
438 word_perm(cv_l, cv_r);
441 msg_exp_even(i_state);
442 msg_add_even(cv_l, cv_r, i_state);
447inline void load_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8],
const lsh_u32 iv[16])
459inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
461 _mm_storeu_si128(
M128_CAST(cv_l+0), _mm_setzero_si128());
462 _mm_storeu_si128(
M128_CAST(cv_l+4), _mm_setzero_si128());
463 _mm_storeu_si128(
M128_CAST(cv_r+0), _mm_setzero_si128());
464 _mm_storeu_si128(
M128_CAST(cv_r+4), _mm_setzero_si128());
467inline void zero_submsgs(LSH256_SSSE3_Context* ctx)
469 lsh_u32* sub_msgs = ctx->sub_msgs;
471 _mm_storeu_si128(
M128_CAST(sub_msgs+ 0), _mm_setzero_si128());
472 _mm_storeu_si128(
M128_CAST(sub_msgs+ 4), _mm_setzero_si128());
473 _mm_storeu_si128(
M128_CAST(sub_msgs+ 8), _mm_setzero_si128());
474 _mm_storeu_si128(
M128_CAST(sub_msgs+12), _mm_setzero_si128());
475 _mm_storeu_si128(
M128_CAST(sub_msgs+16), _mm_setzero_si128());
476 _mm_storeu_si128(
M128_CAST(sub_msgs+20), _mm_setzero_si128());
477 _mm_storeu_si128(
M128_CAST(sub_msgs+24), _mm_setzero_si128());
478 _mm_storeu_si128(
M128_CAST(sub_msgs+28), _mm_setzero_si128());
481inline void init224(LSH256_SSSE3_Context* ctx)
486 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
489inline void init256(LSH256_SSSE3_Context* ctx)
494 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
499inline void fin(LSH256_SSSE3_Context* ctx)
503 _mm_storeu_si128(
M128_CAST(ctx->cv_l+0), _mm_xor_si128(
506 _mm_storeu_si128(
M128_CAST(ctx->cv_l+4), _mm_xor_si128(
513inline void get_hash(LSH256_SSSE3_Context* ctx, lsh_u8* pbHashVal)
519 lsh_uint alg_type = ctx->alg_type;
520 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
521 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
524 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
525 if (hash_val_bit_len){
526 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
532lsh_err lsh256_ssse3_init(LSH256_SSSE3_Context* ctx)
537 lsh_u32 alg_type = ctx->alg_type;
538 const lsh_u32* const_v = NULL;
539 ctx->remain_databitlen = 0;
543 case LSH_TYPE_256_256:
546 case LSH_TYPE_256_224:
553 lsh_u32* cv_l = ctx->cv_l;
554 lsh_u32* cv_r = ctx->cv_r;
557 cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
558 cv_l[1] = LSH_GET_HASHBIT(alg_type);
560 for (
size_t i = 0; i < NUM_STEPS / 2; i++)
563 load_sc(&const_v, i * 16);
564 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
565 word_perm(cv_l, cv_r);
567 load_sc(&const_v, i * 16 + 8);
568 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
569 word_perm(cv_l, cv_r);
575lsh_err lsh256_ssse3_update(LSH256_SSSE3_Context* ctx,
const lsh_u8* data,
size_t databitlen)
582 if (databitlen == 0){
587 size_t databytelen = databitlen >> 3;
589 const size_t pos2 = 0;
591 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
593 const size_t remain_msg_bit = 0;
595 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
596 return LSH_ERR_INVALID_STATE;
598 if (remain_msg_bit > 0){
599 return LSH_ERR_INVALID_DATABITLEN;
602 if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
604 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
605 ctx->remain_databitlen += (lsh_uint)databitlen;
606 remain_msg_byte += (lsh_uint)databytelen;
608 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
613 if (remain_msg_byte > 0){
614 size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
615 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
616 compress(ctx, ctx->last_block);
618 databytelen -= more_byte;
620 ctx->remain_databitlen = 0;
623 while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
629 data += LSH256_MSG_BLK_BYTE_LEN;
630 databytelen -= LSH256_MSG_BLK_BYTE_LEN;
633 if (databytelen > 0){
634 memcpy(ctx->last_block, data, databytelen);
635 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
639 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
640 ctx->remain_databitlen += pos2;
646lsh_err lsh256_ssse3_final(LSH256_SSSE3_Context* ctx, lsh_u8* hashval)
652 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
654 const size_t remain_msg_bit = 0;
656 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
657 return LSH_ERR_INVALID_STATE;
661 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
664 ctx->last_block[remain_msg_byte] = 0x80;
666 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
668 compress(ctx, ctx->last_block);
671 get_hash(ctx, hashval);
676ANONYMOUS_NAMESPACE_END
681void LSH256_Base_Restart_SSSE3(
word32* state)
683 state[RemainingBits] = 0;
684 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
685 lsh_err err = lsh256_ssse3_init(&ctx);
687 if (err != LSH_SUCCESS)
692void LSH256_Base_Update_SSSE3(
word32* state,
const byte *input,
size_t size)
694 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
695 lsh_err err = lsh256_ssse3_update(&ctx, input, 8*size);
697 if (err != LSH_SUCCESS)
702void LSH256_Base_TruncatedFinal_SSSE3(
word32* state,
byte *hash,
size_t)
704 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
705 lsh_err err = lsh256_ssse3_final(&ctx, hash);
707 if (err != LSH_SUCCESS)
#define M128_CAST(x)
Clang workaround.
#define CONST_M128_CAST(x)
Clang workaround.
Base class for all exceptions thrown by the library.
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Library configuration file.
unsigned char byte
8-bit unsigned datatype
unsigned int word32
32-bit unsigned datatype
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Crypto++ library namespace.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.