Crypto++ 8.7
Free C++ class library of cryptographic schemes
lsh256_sse.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "cpu.h"
17#include "misc.h"
18
19// Squash MS LNK4221 and libtool warnings
20extern const char LSH256_SSE_FNAME[] = __FILE__;
21
22#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
23
24#if defined(CRYPTOPP_SSSE3_AVAILABLE)
25# include <emmintrin.h>
26# include <tmmintrin.h>
27#endif
28
29#if defined(CRYPTOPP_XOP_AVAILABLE)
30# include <ammintrin.h>
31#endif
32
33// GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
34#if (CRYPTOPP_GCC_VERSION >= 40500)
35# include <x86intrin.h>
36#endif
37
38ANONYMOUS_NAMESPACE_BEGIN
39
40/* LSH Constants */
41
42const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
43// const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
44// const unsigned int LSH256_CV_BYTE_LEN = 64;
45const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
46
47// const unsigned int MSG_BLK_WORD_LEN = 32;
48const unsigned int CV_WORD_LEN = 16;
49const unsigned int CONST_WORD_LEN = 8;
50// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
51// const unsigned int WORD_BIT_LEN = 32;
52const unsigned int NUM_STEPS = 26;
53
54const unsigned int ROT_EVEN_ALPHA = 29;
55const unsigned int ROT_EVEN_BETA = 1;
56const unsigned int ROT_ODD_ALPHA = 5;
57const unsigned int ROT_ODD_BETA = 17;
58
59const unsigned int LSH_TYPE_256_256 = 0x0000020;
60const unsigned int LSH_TYPE_256_224 = 0x000001C;
61
62// const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
63// const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
64
65/* Error Code */
66
67const unsigned int LSH_SUCCESS = 0x0;
68// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
69// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
70const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
71const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
72
73/* Index into our state array */
74
75const unsigned int AlgorithmType = 80;
76const unsigned int RemainingBits = 81;
77
78NAMESPACE_END
79
80NAMESPACE_BEGIN(CryptoPP)
81NAMESPACE_BEGIN(LSH)
82
83// lsh256.cpp
84extern const word32 LSH256_IV224[CV_WORD_LEN];
85extern const word32 LSH256_IV256[CV_WORD_LEN];
86extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
87
88NAMESPACE_END // LSH
89NAMESPACE_END // Crypto++
90
91ANONYMOUS_NAMESPACE_BEGIN
92
93using CryptoPP::byte;
97
98using CryptoPP::GetBlock;
102
103typedef byte lsh_u8;
104typedef word32 lsh_u32;
105typedef word32 lsh_uint;
106typedef word32 lsh_err;
107typedef word32 lsh_type;
108
109using CryptoPP::LSH::LSH256_IV224;
110using CryptoPP::LSH::LSH256_IV256;
111using CryptoPP::LSH::LSH256_StepConstants;
112
113struct LSH256_SSSE3_Context
114{
115 LSH256_SSSE3_Context(word32* state, word32 algType, word32& remainingBitLength) :
116 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
117 last_block(reinterpret_cast<byte*>(state+48)),
118 remain_databitlen(remainingBitLength),
119 alg_type(static_cast<lsh_type>(algType)) {}
120
121 lsh_u32* cv_l; // start of our state block
122 lsh_u32* cv_r;
123 lsh_u32* sub_msgs;
124 lsh_u8* last_block;
125 lsh_u32& remain_databitlen;
126 lsh_type alg_type;
127};
128
129struct LSH256_SSSE3_Internal
130{
131 LSH256_SSSE3_Internal(word32* state) :
132 submsg_e_l(state+16), submsg_e_r(state+24),
133 submsg_o_l(state+32), submsg_o_r(state+40) { }
134
135 lsh_u32* submsg_e_l; /* even left sub-message */
136 lsh_u32* submsg_e_r; /* even right sub-message */
137 lsh_u32* submsg_o_l; /* odd left sub-message */
138 lsh_u32* submsg_o_r; /* odd right sub-message */
139};
140
141// const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
142
143/* LSH AlgType Macro */
144
145inline bool LSH_IS_LSH512(lsh_uint val) {
146 return (val & 0xf0000) == 0;
147}
148
149inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
150 return val >> 24;
151}
152
153inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
154 return val & 0xffff;
155}
156
157inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
158 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
159}
160
161inline lsh_u32 loadLE32(lsh_u32 v) {
163}
164
165lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
166 return rotlFixed(x, r);
167}
168
169// Original code relied upon unaligned lsh_u32 buffer
170inline void load_msg_blk(LSH256_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
171{
172 CRYPTOPP_ASSERT(i_state != NULLPTR);
173 lsh_u32* submsg_e_l = i_state->submsg_e_l;
174 lsh_u32* submsg_e_r = i_state->submsg_e_r;
175 lsh_u32* submsg_o_l = i_state->submsg_o_l;
176 lsh_u32* submsg_o_r = i_state->submsg_o_r;
177
178 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
179 _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
180 _mm_storeu_si128(M128_CAST(submsg_e_l+4),
181 _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
182 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
183 _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
184 _mm_storeu_si128(M128_CAST(submsg_e_r+4),
185 _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
186 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
187 _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
188 _mm_storeu_si128(M128_CAST(submsg_o_l+4),
189 _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
190 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
191 _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
192 _mm_storeu_si128(M128_CAST(submsg_o_r+4),
193 _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
194}
195
196inline void msg_exp_even(LSH256_SSSE3_Internal* i_state)
197{
198 CRYPTOPP_ASSERT(i_state != NULLPTR);
199
200 lsh_u32* submsg_e_l = i_state->submsg_e_l;
201 lsh_u32* submsg_e_r = i_state->submsg_e_r;
202 lsh_u32* submsg_o_l = i_state->submsg_o_l;
203 lsh_u32* submsg_o_r = i_state->submsg_o_r;
204
205 _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi32(
206 _mm_shuffle_epi32(
207 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(3,2,1,0)),
208 _mm_shuffle_epi32(
209 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(1,0,2,3))));
210
211 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi32(
212 _mm_shuffle_epi32(
213 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(3,2,1,0)),
214 _mm_shuffle_epi32(
215 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(2,1,0,3))));
216
217 _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi32(
218 _mm_shuffle_epi32(
219 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(3,2,1,0)),
220 _mm_shuffle_epi32(
221 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(1,0,2,3))));
222
223 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi32(
224 _mm_shuffle_epi32(
225 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(3,2,1,0)),
226 _mm_shuffle_epi32(
227 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(2,1,0,3))));
228}
229
230inline void msg_exp_odd(LSH256_SSSE3_Internal* i_state)
231{
232 CRYPTOPP_ASSERT(i_state != NULLPTR);
233
234 lsh_u32* submsg_e_l = i_state->submsg_e_l;
235 lsh_u32* submsg_e_r = i_state->submsg_e_r;
236 lsh_u32* submsg_o_l = i_state->submsg_o_l;
237 lsh_u32* submsg_o_r = i_state->submsg_o_r;
238
239 _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi32(
240 _mm_shuffle_epi32(
241 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(3,2,1,0)),
242 _mm_shuffle_epi32(
243 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(1,0,2,3))));
244
245 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi32(
246 _mm_shuffle_epi32(
247 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(3,2,1,0)),
248 _mm_shuffle_epi32(
249 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(2,1,0,3))));
250
251 _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi32(
252 _mm_shuffle_epi32(
253 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(3,2,1,0)),
254 _mm_shuffle_epi32(
255 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(1,0,2,3))));
256
257 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi32(
258 _mm_shuffle_epi32(
259 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(3,2,1,0)),
260 _mm_shuffle_epi32(
261 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(2,1,0,3))));
262}
263
264inline void load_sc(const lsh_u32** p_const_v, size_t i)
265{
266 CRYPTOPP_ASSERT(p_const_v != NULLPTR);
267
268 *p_const_v = &LSH256_StepConstants[i];
269}
270
271inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
272{
273 CRYPTOPP_ASSERT(i_state != NULLPTR);
274
275 lsh_u32* submsg_e_l = i_state->submsg_e_l;
276 lsh_u32* submsg_e_r = i_state->submsg_e_r;
277
278 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_xor_si128(
279 _mm_loadu_si128(CONST_M128_CAST(cv_l+0)),
280 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
281 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
282 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
283 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
284 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_xor_si128(
285 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
286 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
287 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
288 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
289 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
290}
291
292inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
293{
294 CRYPTOPP_ASSERT(i_state != NULLPTR);
295
296 lsh_u32* submsg_o_l = i_state->submsg_o_l;
297 lsh_u32* submsg_o_r = i_state->submsg_o_r;
298
299 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
300 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
301 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
302 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
303 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
304 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
305 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
306 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
307 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
308 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
309 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
310 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
311}
312
313inline void add_blk(lsh_u32 cv_l[8], const lsh_u32 cv_r[8])
314{
315 _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi32(
316 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
317 _mm_loadu_si128(CONST_M128_CAST(cv_r))));
318 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi32(
319 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
320 _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
321}
322
323template <unsigned int R>
324inline void rotate_blk(lsh_u32 cv[8])
325{
326#if defined(CRYPTOPP_XOP_AVAILABLE)
327 _mm_storeu_si128(M128_CAST(cv),
328 _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
329 _mm_storeu_si128(M128_CAST(cv+4),
330 _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
331#else
332 _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
333 _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
334 _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), 32-R)));
335 _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
336 _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
337 _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 32-R)));
338#endif
339}
340
341inline void xor_with_const(lsh_u32* cv_l, const lsh_u32* const_v)
342{
343 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
344 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
345 _mm_loadu_si128(CONST_M128_CAST(const_v))));
346 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
347 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
348 _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
349}
350
351inline void rotate_msg_gamma(lsh_u32 cv_r[8])
352{
353 // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
354 _mm_storeu_si128(M128_CAST(cv_r+0),
355 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
356 _mm_set_epi8(12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
357 _mm_storeu_si128(M128_CAST(cv_r+4),
358 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
359 _mm_set_epi8(15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1)));
360}
361
362inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
363{
364 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_shuffle_epi32(
365 _mm_loadu_si128(CONST_M128_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
366 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_shuffle_epi32(
367 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
368 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_shuffle_epi32(
369 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
370 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_shuffle_epi32(
371 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
372
373 __m128i temp = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
374 _mm_storeu_si128(M128_CAST(cv_l+0),
375 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
376 _mm_storeu_si128(M128_CAST(cv_l+4),
377 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
378 _mm_storeu_si128(M128_CAST(cv_r+4),
379 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
380 _mm_storeu_si128(M128_CAST(cv_r+0), temp);
381};
382
383/* -------------------------------------------------------- *
384* step function
385* -------------------------------------------------------- */
386
387template <unsigned int Alpha, unsigned int Beta>
388inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
389{
390 add_blk(cv_l, cv_r);
391 rotate_blk<Alpha>(cv_l);
392 xor_with_const(cv_l, const_v);
393 add_blk(cv_r, cv_l);
394 rotate_blk<Beta>(cv_r);
395 add_blk(cv_l, cv_r);
396 rotate_msg_gamma(cv_r);
397}
398
399/* -------------------------------------------------------- *
400* compression function
401* -------------------------------------------------------- */
402
403inline void compress(LSH256_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
404{
405 CRYPTOPP_ASSERT(ctx != NULLPTR);
406
407 LSH256_SSSE3_Internal s_state(ctx->cv_l);
408 LSH256_SSSE3_Internal* i_state = &s_state;
409
410 const lsh_u32* const_v = NULL;
411 lsh_u32* cv_l = ctx->cv_l;
412 lsh_u32* cv_r = ctx->cv_r;
413
414 load_msg_blk(i_state, pdMsgBlk);
415
416 msg_add_even(cv_l, cv_r, i_state);
417 load_sc(&const_v, 0);
418 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
419 word_perm(cv_l, cv_r);
420
421 msg_add_odd(cv_l, cv_r, i_state);
422 load_sc(&const_v, 8);
423 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
424 word_perm(cv_l, cv_r);
425
426 for (size_t i = 1; i < NUM_STEPS / 2; i++)
427 {
428 msg_exp_even(i_state);
429 msg_add_even(cv_l, cv_r, i_state);
430 load_sc(&const_v, 16 * i);
431 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
432 word_perm(cv_l, cv_r);
433
434 msg_exp_odd(i_state);
435 msg_add_odd(cv_l, cv_r, i_state);
436 load_sc(&const_v, 16 * i + 8);
437 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
438 word_perm(cv_l, cv_r);
439 }
440
441 msg_exp_even(i_state);
442 msg_add_even(cv_l, cv_r, i_state);
443}
444
445/* -------------------------------------------------------- */
446
447inline void load_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 iv[16])
448{
449 _mm_storeu_si128(M128_CAST(cv_l+ 0),
450 _mm_load_si128(CONST_M128_CAST(iv+ 0)));
451 _mm_storeu_si128(M128_CAST(cv_l+ 4),
452 _mm_load_si128(CONST_M128_CAST(iv+ 4)));
453 _mm_storeu_si128(M128_CAST(cv_r+ 0),
454 _mm_load_si128(CONST_M128_CAST(iv+ 8)));
455 _mm_storeu_si128(M128_CAST(cv_r+ 4),
456 _mm_load_si128(CONST_M128_CAST(iv+12)));
457}
458
459inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
460{
461 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
462 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
463 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
464 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
465}
466
467inline void zero_submsgs(LSH256_SSSE3_Context* ctx)
468{
469 lsh_u32* sub_msgs = ctx->sub_msgs;
470
471 _mm_storeu_si128(M128_CAST(sub_msgs+ 0), _mm_setzero_si128());
472 _mm_storeu_si128(M128_CAST(sub_msgs+ 4), _mm_setzero_si128());
473 _mm_storeu_si128(M128_CAST(sub_msgs+ 8), _mm_setzero_si128());
474 _mm_storeu_si128(M128_CAST(sub_msgs+12), _mm_setzero_si128());
475 _mm_storeu_si128(M128_CAST(sub_msgs+16), _mm_setzero_si128());
476 _mm_storeu_si128(M128_CAST(sub_msgs+20), _mm_setzero_si128());
477 _mm_storeu_si128(M128_CAST(sub_msgs+24), _mm_setzero_si128());
478 _mm_storeu_si128(M128_CAST(sub_msgs+28), _mm_setzero_si128());
479}
480
481inline void init224(LSH256_SSSE3_Context* ctx)
482{
483 CRYPTOPP_ASSERT(ctx != NULLPTR);
484
485 zero_submsgs(ctx);
486 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
487}
488
489inline void init256(LSH256_SSSE3_Context* ctx)
490{
491 CRYPTOPP_ASSERT(ctx != NULLPTR);
492
493 zero_submsgs(ctx);
494 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
495}
496
497/* -------------------------------------------------------- */
498
499inline void fin(LSH256_SSSE3_Context* ctx)
500{
501 CRYPTOPP_ASSERT(ctx != NULLPTR);
502
503 _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
504 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
505 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
506 _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
507 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
508 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
509}
510
511/* -------------------------------------------------------- */
512
513inline void get_hash(LSH256_SSSE3_Context* ctx, lsh_u8* pbHashVal)
514{
515 CRYPTOPP_ASSERT(ctx != NULLPTR);
516 CRYPTOPP_ASSERT(ctx->alg_type != 0);
517 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
518
519 lsh_uint alg_type = ctx->alg_type;
520 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
521 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
522
523 // Multiplying by sizeof(lsh_u8) looks odd...
524 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
525 if (hash_val_bit_len){
526 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
527 }
528}
529
530/* -------------------------------------------------------- */
531
532lsh_err lsh256_ssse3_init(LSH256_SSSE3_Context* ctx)
533{
534 CRYPTOPP_ASSERT(ctx != NULLPTR);
535 CRYPTOPP_ASSERT(ctx->alg_type != 0);
536
537 lsh_u32 alg_type = ctx->alg_type;
538 const lsh_u32* const_v = NULL;
539 ctx->remain_databitlen = 0;
540
541 switch (alg_type)
542 {
543 case LSH_TYPE_256_256:
544 init256(ctx);
545 return LSH_SUCCESS;
546 case LSH_TYPE_256_224:
547 init224(ctx);
548 return LSH_SUCCESS;
549 default:
550 break;
551 }
552
553 lsh_u32* cv_l = ctx->cv_l;
554 lsh_u32* cv_r = ctx->cv_r;
555
556 zero_iv(cv_l, cv_r);
557 cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
558 cv_l[1] = LSH_GET_HASHBIT(alg_type);
559
560 for (size_t i = 0; i < NUM_STEPS / 2; i++)
561 {
562 //Mix
563 load_sc(&const_v, i * 16);
564 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
565 word_perm(cv_l, cv_r);
566
567 load_sc(&const_v, i * 16 + 8);
568 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
569 word_perm(cv_l, cv_r);
570 }
571
572 return LSH_SUCCESS;
573}
574
575lsh_err lsh256_ssse3_update(LSH256_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
576{
577 CRYPTOPP_ASSERT(ctx != NULLPTR);
578 CRYPTOPP_ASSERT(data != NULLPTR);
579 CRYPTOPP_ASSERT(databitlen % 8 == 0);
580 CRYPTOPP_ASSERT(ctx->alg_type != 0);
581
582 if (databitlen == 0){
583 return LSH_SUCCESS;
584 }
585
586 // We are byte oriented. tail bits will always be 0.
587 size_t databytelen = databitlen >> 3;
588 // lsh_uint pos2 = databitlen & 0x7;
589 const size_t pos2 = 0;
590
591 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
592 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
593 const size_t remain_msg_bit = 0;
594
595 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
596 return LSH_ERR_INVALID_STATE;
597 }
598 if (remain_msg_bit > 0){
599 return LSH_ERR_INVALID_DATABITLEN;
600 }
601
602 if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
603 {
604 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
605 ctx->remain_databitlen += (lsh_uint)databitlen;
606 remain_msg_byte += (lsh_uint)databytelen;
607 if (pos2){
608 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
609 }
610 return LSH_SUCCESS;
611 }
612
613 if (remain_msg_byte > 0){
614 size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
615 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
616 compress(ctx, ctx->last_block);
617 data += more_byte;
618 databytelen -= more_byte;
619 remain_msg_byte = 0;
620 ctx->remain_databitlen = 0;
621 }
622
623 while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
624 {
625 // This call to compress caused some trouble.
626 // The data pointer can become unaligned in the
627 // previous block.
628 compress(ctx, data);
629 data += LSH256_MSG_BLK_BYTE_LEN;
630 databytelen -= LSH256_MSG_BLK_BYTE_LEN;
631 }
632
633 if (databytelen > 0){
634 memcpy(ctx->last_block, data, databytelen);
635 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
636 }
637
638 if (pos2){
639 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
640 ctx->remain_databitlen += pos2;
641 }
642
643 return LSH_SUCCESS;
644}
645
646lsh_err lsh256_ssse3_final(LSH256_SSSE3_Context* ctx, lsh_u8* hashval)
647{
648 CRYPTOPP_ASSERT(ctx != NULLPTR);
649 CRYPTOPP_ASSERT(hashval != NULLPTR);
650
651 // We are byte oriented. tail bits will always be 0.
652 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
653 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
654 const size_t remain_msg_bit = 0;
655
656 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
657 return LSH_ERR_INVALID_STATE;
658 }
659
660 if (remain_msg_bit){
661 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
662 }
663 else{
664 ctx->last_block[remain_msg_byte] = 0x80;
665 }
666 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
667
668 compress(ctx, ctx->last_block);
669
670 fin(ctx);
671 get_hash(ctx, hashval);
672
673 return LSH_SUCCESS;
674}
675
676ANONYMOUS_NAMESPACE_END // Anonymous
677
678NAMESPACE_BEGIN(CryptoPP)
679
680extern
681void LSH256_Base_Restart_SSSE3(word32* state)
682{
683 state[RemainingBits] = 0;
684 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
685 lsh_err err = lsh256_ssse3_init(&ctx);
686
687 if (err != LSH_SUCCESS)
688 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_init failed");
689}
690
691extern
692void LSH256_Base_Update_SSSE3(word32* state, const byte *input, size_t size)
693{
694 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
695 lsh_err err = lsh256_ssse3_update(&ctx, input, 8*size);
696
697 if (err != LSH_SUCCESS)
698 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_update failed");
699}
700
701extern
702void LSH256_Base_TruncatedFinal_SSSE3(word32* state, byte *hash, size_t)
703{
704 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
705 lsh_err err = lsh256_ssse3_final(&ctx, hash);
706
707 if (err != LSH_SUCCESS)
708 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_final failed");
709}
710
711NAMESPACE_END
712
713#endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1548
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2208
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1599
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68