19#if (CRYPTOPP_SSSE3_AVAILABLE)
21# include <pmmintrin.h>
22# include <tmmintrin.h>
26# include <ammintrin.h>
28# include <x86intrin.h>
32#if (CRYPTOPP_ARM_NEON_HEADER)
37#if (CRYPTOPP_ARM_ACLE_HEADER)
60#undef CRYPTOPP_POWER8_AVAILABLE
61#if defined(CRYPTOPP_POWER8_AVAILABLE)
67extern const char LEA_SIMD_FNAME[] = __FILE__;
69ANONYMOUS_NAMESPACE_BEGIN
75#if (CRYPTOPP_ARM_NEON_AVAILABLE)
77inline uint32x4_t Xor(
const uint32x4_t& a,
const uint32x4_t& b)
79 return veorq_u32(a, b);
82inline uint32x4_t Add(
const uint32x4_t& a,
const uint32x4_t& b)
84 return vaddq_u32(a, b);
87inline uint32x4_t Sub(
const uint32x4_t& a,
const uint32x4_t& b)
89 return vsubq_u32(a, b);
92template <
unsigned int R>
93inline uint32x4_t RotateLeft(
const uint32x4_t& val)
95 const uint32x4_t a(vshlq_n_u32(val, R));
96 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
97 return vorrq_u32(a, b);
100template <
unsigned int R>
101inline uint32x4_t RotateRight(
const uint32x4_t& val)
103 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
104 const uint32x4_t b(vshrq_n_u32(val, R));
105 return vorrq_u32(a, b);
108#if defined(__aarch32__) || defined(__aarch64__)
110inline uint32x4_t RotateLeft<8>(
const uint32x4_t& val)
112#if (CRYPTOPP_BIG_ENDIAN)
113 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
114 const uint8x16_t mask = vld1q_u8(maskb);
116 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
117 const uint8x16_t mask = vld1q_u8(maskb);
120 return vreinterpretq_u32_u8(
121 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
125inline uint32x4_t RotateRight<8>(
const uint32x4_t& val)
127#if (CRYPTOPP_BIG_ENDIAN)
128 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
129 const uint8x16_t mask = vld1q_u8(maskb);
131 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
132 const uint8x16_t mask = vld1q_u8(maskb);
135 return vreinterpretq_u32_u8(
136 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
140uint32x4_t UnpackLow32(uint32x4_t a, uint32x4_t b)
142 uint32x2_t a1 = vget_low_u32(a);
143 uint32x2_t b1 = vget_low_u32(b);
144 uint32x2x2_t result = vzip_u32(a1, b1);
145 return vcombine_u32(result.val[0], result.val[1]);
148uint32x4_t UnpackHigh32(uint32x4_t a, uint32x4_t b)
150 uint32x2_t a1 = vget_high_u32(a);
151 uint32x2_t b1 = vget_high_u32(b);
152 uint32x2x2_t result = vzip_u32(a1, b1);
153 return vcombine_u32(result.val[0], result.val[1]);
156uint32x4_t UnpackLow64(uint32x4_t a, uint32x4_t b)
158 uint64x1_t a1 = vget_low_u64((uint64x2_t)a);
159 uint64x1_t b1 = vget_low_u64((uint64x2_t)b);
160 return (uint32x4_t)vcombine_u64(a1, b1);
163uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
165 uint64x1_t a1 = vget_high_u64((uint64x2_t)a);
166 uint64x1_t b1 = vget_high_u64((uint64x2_t)b);
167 return (uint32x4_t)vcombine_u64(a1, b1);
170template <
unsigned int IDX>
171inline uint32x4_t LoadKey(
const word32 rkey[])
173 return vdupq_n_u32(rkey[IDX]);
176template <
unsigned int IDX>
177inline uint32x4_t UnpackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
182 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
183 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
184 return vmovq_n_u32(0);
188inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
190 const uint32x4_t r1 = UnpackLow32(a, b);
191 const uint32x4_t r2 = UnpackLow32(c, d);
192 return UnpackLow64(r1, r2);
196inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
198 const uint32x4_t r1 = UnpackLow32(a, b);
199 const uint32x4_t r2 = UnpackLow32(c, d);
200 return UnpackHigh64(r1, r2);
204inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
206 const uint32x4_t r1 = UnpackHigh32(a, b);
207 const uint32x4_t r2 = UnpackHigh32(c, d);
208 return UnpackLow64(r1, r2);
212inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
214 const uint32x4_t r1 = UnpackHigh32(a, b);
215 const uint32x4_t r2 = UnpackHigh32(c, d);
216 return UnpackHigh64(r1, r2);
219template <
unsigned int IDX>
220inline uint32x4_t UnpackNEON(
const uint32x4_t& v)
226 return vmovq_n_u32(0);
230inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& v)
233 return vdupq_n_u32(vgetq_lane_u32(v, 0));
237inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& v)
240 return vdupq_n_u32(vgetq_lane_u32(v, 1));
244inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& v)
247 return vdupq_n_u32(vgetq_lane_u32(v, 2));
251inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& v)
254 return vdupq_n_u32(vgetq_lane_u32(v, 3));
257template <
unsigned int IDX>
258inline uint32x4_t RepackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
260 return UnpackNEON<IDX>(a, b, c, d);
263template <
unsigned int IDX>
264inline uint32x4_t RepackNEON(
const uint32x4_t& v)
266 return UnpackNEON<IDX>(v);
273#if (CRYPTOPP_SSSE3_AVAILABLE)
275inline __m128i Xor(
const __m128i& a,
const __m128i& b)
277 return _mm_xor_si128(a, b);
280inline __m128i Add(
const __m128i& a,
const __m128i& b)
282 return _mm_add_epi32(a, b);
285inline __m128i Sub(
const __m128i& a,
const __m128i& b)
287 return _mm_sub_epi32(a, b);
290template <
unsigned int R>
291inline __m128i RotateLeft(
const __m128i& val)
294 return _mm_roti_epi32(val, R);
297 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
301template <
unsigned int R>
302inline __m128i RotateRight(
const __m128i& val)
305 return _mm_roti_epi32(val, 32-R);
308 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
314inline __m128i RotateLeft<8>(
const __m128i& val)
317 return _mm_roti_epi32(val, 8);
319 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
320 return _mm_shuffle_epi8(val, mask);
326inline __m128i RotateRight<8>(
const __m128i& val)
329 return _mm_roti_epi32(val, 32-8);
331 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
332 return _mm_shuffle_epi8(val, mask);
336template <
unsigned int IDX>
337inline __m128i LoadKey(
const word32 rkey[])
339 float rk; std::memcpy(&rk, rkey+IDX,
sizeof(rk));
340 return _mm_castps_si128(_mm_load_ps1(&rk));
343template <
unsigned int IDX>
344inline __m128i UnpackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
347 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
348 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
350 return _mm_setzero_si128();
354inline __m128i UnpackXMM<0>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
357 const __m128i r1 = _mm_unpacklo_epi32(a, b);
358 const __m128i r2 = _mm_unpacklo_epi32(c, d);
359 return _mm_unpacklo_epi64(r1, r2);
363inline __m128i UnpackXMM<1>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
366 const __m128i r1 = _mm_unpacklo_epi32(a, b);
367 const __m128i r2 = _mm_unpacklo_epi32(c, d);
368 return _mm_unpackhi_epi64(r1, r2);
372inline __m128i UnpackXMM<2>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
375 const __m128i r1 = _mm_unpackhi_epi32(a, b);
376 const __m128i r2 = _mm_unpackhi_epi32(c, d);
377 return _mm_unpacklo_epi64(r1, r2);
381inline __m128i UnpackXMM<3>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
384 const __m128i r1 = _mm_unpackhi_epi32(a, b);
385 const __m128i r2 = _mm_unpackhi_epi32(c, d);
386 return _mm_unpackhi_epi64(r1, r2);
389template <
unsigned int IDX>
390inline __m128i UnpackXMM(
const __m128i& v)
394 return _mm_setzero_si128();
398inline __m128i UnpackXMM<0>(
const __m128i& v)
401 return _mm_shuffle_epi8(v, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
405inline __m128i UnpackXMM<1>(
const __m128i& v)
408 return _mm_shuffle_epi8(v, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
412inline __m128i UnpackXMM<2>(
const __m128i& v)
415 return _mm_shuffle_epi8(v, _mm_set_epi8(11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8));
419inline __m128i UnpackXMM<3>(
const __m128i& v)
422 return _mm_shuffle_epi8(v, _mm_set_epi8(15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12));
425template <
unsigned int IDX>
426inline __m128i RepackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
428 return UnpackXMM<IDX>(a, b, c, d);
431template <
unsigned int IDX>
432inline __m128i RepackXMM(
const __m128i& v)
434 return UnpackXMM<IDX>(v);
441#if (CRYPTOPP_POWER8_AVAILABLE)
462template <
unsigned int R>
466 return vec_rl(val, m);
469template <
unsigned int R>
472 const uint32x4_p m = {32-R, 32-R, 32-R, 32-R};
473 return vec_rl(val, m);
476template <
unsigned int IDX>
479 return vec_splats(rkey[IDX]);
482template <
unsigned int IDX>
486 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
487 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
524template <
unsigned int IDX>
536 const uint8x16_p m = {3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0};
544 const uint8x16_p m = {7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4};
552 const uint8x16_p m = {11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8};
560 const uint8x16_p m = {15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12};
564template <
unsigned int IDX>
567 return UnpackSIMD<IDX>(a, b, c, d);
570template <
unsigned int IDX>
573 return UnpackSIMD<IDX>(v);
580#if (CRYPTOPP_ARM_NEON_AVAILABLE || CRYPTOPP_SSSE3_AVAILABLE)
583inline void LEA_Encryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
585 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<4>(subkeys)), Xor(temp[3], LoadKey<5>(subkeys))));
586 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<2>(subkeys)), Xor(temp[2], LoadKey<3>(subkeys))));
587 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<0>(subkeys)), Xor(temp[1], LoadKey<1>(subkeys))));
588 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<10>(subkeys)), Xor(temp[0], LoadKey<11>(subkeys))));
589 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<8>(subkeys)), Xor(temp[3], LoadKey<9>(subkeys))));
590 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<6>(subkeys)), Xor(temp[2], LoadKey<7>(subkeys))));
591 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<16>(subkeys)), Xor(temp[1], LoadKey<17>(subkeys))));
592 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<14>(subkeys)), Xor(temp[0], LoadKey<15>(subkeys))));
593 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<12>(subkeys)), Xor(temp[3], LoadKey<13>(subkeys))));
594 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<22>(subkeys)), Xor(temp[2], LoadKey<23>(subkeys))));
595 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<20>(subkeys)), Xor(temp[1], LoadKey<21>(subkeys))));
596 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<18>(subkeys)), Xor(temp[0], LoadKey<19>(subkeys))));
598 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<28>(subkeys)), Xor(temp[3], LoadKey<29>(subkeys))));
599 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<26>(subkeys)), Xor(temp[2], LoadKey<27>(subkeys))));
600 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<24>(subkeys)), Xor(temp[1], LoadKey<25>(subkeys))));
601 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<34>(subkeys)), Xor(temp[0], LoadKey<35>(subkeys))));
602 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<32>(subkeys)), Xor(temp[3], LoadKey<33>(subkeys))));
603 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<30>(subkeys)), Xor(temp[2], LoadKey<31>(subkeys))));
604 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<40>(subkeys)), Xor(temp[1], LoadKey<41>(subkeys))));
605 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<38>(subkeys)), Xor(temp[0], LoadKey<39>(subkeys))));
606 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<36>(subkeys)), Xor(temp[3], LoadKey<37>(subkeys))));
607 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<46>(subkeys)), Xor(temp[2], LoadKey<47>(subkeys))));
608 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<44>(subkeys)), Xor(temp[1], LoadKey<45>(subkeys))));
609 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<42>(subkeys)), Xor(temp[0], LoadKey<43>(subkeys))));
611 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<52>(subkeys)), Xor(temp[3], LoadKey<53>(subkeys))));
612 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<50>(subkeys)), Xor(temp[2], LoadKey<51>(subkeys))));
613 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<48>(subkeys)), Xor(temp[1], LoadKey<49>(subkeys))));
614 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<58>(subkeys)), Xor(temp[0], LoadKey<59>(subkeys))));
615 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<56>(subkeys)), Xor(temp[3], LoadKey<57>(subkeys))));
616 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<54>(subkeys)), Xor(temp[2], LoadKey<55>(subkeys))));
617 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<64>(subkeys)), Xor(temp[1], LoadKey<65>(subkeys))));
618 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<62>(subkeys)), Xor(temp[0], LoadKey<63>(subkeys))));
619 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<60>(subkeys)), Xor(temp[3], LoadKey<61>(subkeys))));
620 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<70>(subkeys)), Xor(temp[2], LoadKey<71>(subkeys))));
621 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<68>(subkeys)), Xor(temp[1], LoadKey<69>(subkeys))));
622 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<66>(subkeys)), Xor(temp[0], LoadKey<67>(subkeys))));
624 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<76>(subkeys)), Xor(temp[3], LoadKey<77>(subkeys))));
625 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<74>(subkeys)), Xor(temp[2], LoadKey<75>(subkeys))));
626 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<72>(subkeys)), Xor(temp[1], LoadKey<73>(subkeys))));
627 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<82>(subkeys)), Xor(temp[0], LoadKey<83>(subkeys))));
628 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<80>(subkeys)), Xor(temp[3], LoadKey<81>(subkeys))));
629 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<78>(subkeys)), Xor(temp[2], LoadKey<79>(subkeys))));
630 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<88>(subkeys)), Xor(temp[1], LoadKey<89>(subkeys))));
631 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<86>(subkeys)), Xor(temp[0], LoadKey<87>(subkeys))));
632 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<84>(subkeys)), Xor(temp[3], LoadKey<85>(subkeys))));
633 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<94>(subkeys)), Xor(temp[2], LoadKey<95>(subkeys))));
634 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<92>(subkeys)), Xor(temp[1], LoadKey<93>(subkeys))));
635 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<90>(subkeys)), Xor(temp[0], LoadKey<91>(subkeys))));
637 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<100>(subkeys)), Xor(temp[3], LoadKey<101>(subkeys))));
638 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<98>(subkeys)), Xor(temp[2], LoadKey<99>(subkeys))));
639 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<96>(subkeys)), Xor(temp[1], LoadKey<97>(subkeys))));
640 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<106>(subkeys)), Xor(temp[0], LoadKey<107>(subkeys))));
641 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<104>(subkeys)), Xor(temp[3], LoadKey<105>(subkeys))));
642 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<102>(subkeys)), Xor(temp[2], LoadKey<103>(subkeys))));
643 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<112>(subkeys)), Xor(temp[1], LoadKey<113>(subkeys))));
644 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<110>(subkeys)), Xor(temp[0], LoadKey<111>(subkeys))));
645 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<108>(subkeys)), Xor(temp[3], LoadKey<109>(subkeys))));
646 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<118>(subkeys)), Xor(temp[2], LoadKey<119>(subkeys))));
647 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<116>(subkeys)), Xor(temp[1], LoadKey<117>(subkeys))));
648 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<114>(subkeys)), Xor(temp[0], LoadKey<115>(subkeys))));
650 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<124>(subkeys)), Xor(temp[3], LoadKey<125>(subkeys))));
651 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<122>(subkeys)), Xor(temp[2], LoadKey<123>(subkeys))));
652 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<120>(subkeys)), Xor(temp[1], LoadKey<121>(subkeys))));
653 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<130>(subkeys)), Xor(temp[0], LoadKey<131>(subkeys))));
654 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<128>(subkeys)), Xor(temp[3], LoadKey<129>(subkeys))));
655 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<126>(subkeys)), Xor(temp[2], LoadKey<127>(subkeys))));
656 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<136>(subkeys)), Xor(temp[1], LoadKey<137>(subkeys))));
657 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<134>(subkeys)), Xor(temp[0], LoadKey<135>(subkeys))));
658 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<132>(subkeys)), Xor(temp[3], LoadKey<133>(subkeys))));
659 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<142>(subkeys)), Xor(temp[2], LoadKey<143>(subkeys))));
660 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<140>(subkeys)), Xor(temp[1], LoadKey<141>(subkeys))));
661 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<138>(subkeys)), Xor(temp[0], LoadKey<139>(subkeys))));
665 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<148>(subkeys)), Xor(temp[3], LoadKey<149>(subkeys))));
666 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<146>(subkeys)), Xor(temp[2], LoadKey<147>(subkeys))));
667 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<144>(subkeys)), Xor(temp[1], LoadKey<145>(subkeys))));
668 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<154>(subkeys)), Xor(temp[0], LoadKey<155>(subkeys))));
669 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<152>(subkeys)), Xor(temp[3], LoadKey<153>(subkeys))));
670 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<150>(subkeys)), Xor(temp[2], LoadKey<151>(subkeys))));
671 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<160>(subkeys)), Xor(temp[1], LoadKey<161>(subkeys))));
672 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<158>(subkeys)), Xor(temp[0], LoadKey<159>(subkeys))));
673 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<156>(subkeys)), Xor(temp[3], LoadKey<157>(subkeys))));
674 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<166>(subkeys)), Xor(temp[2], LoadKey<167>(subkeys))));
675 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<164>(subkeys)), Xor(temp[1], LoadKey<165>(subkeys))));
676 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<162>(subkeys)), Xor(temp[0], LoadKey<163>(subkeys))));
681 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<172>(subkeys)), Xor(temp[3], LoadKey<173>(subkeys))));
682 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<170>(subkeys)), Xor(temp[2], LoadKey<171>(subkeys))));
683 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<168>(subkeys)), Xor(temp[1], LoadKey<169>(subkeys))));
684 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<178>(subkeys)), Xor(temp[0], LoadKey<179>(subkeys))));
685 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<176>(subkeys)), Xor(temp[3], LoadKey<177>(subkeys))));
686 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<174>(subkeys)), Xor(temp[2], LoadKey<175>(subkeys))));
687 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<184>(subkeys)), Xor(temp[1], LoadKey<185>(subkeys))));
688 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<182>(subkeys)), Xor(temp[0], LoadKey<183>(subkeys))));
689 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<180>(subkeys)), Xor(temp[3], LoadKey<181>(subkeys))));
690 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<190>(subkeys)), Xor(temp[2], LoadKey<191>(subkeys))));
691 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<188>(subkeys)), Xor(temp[1], LoadKey<189>(subkeys))));
692 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<186>(subkeys)), Xor(temp[0], LoadKey<187>(subkeys))));
699inline void LEA_Decryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
703 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<186>(subkeys))), LoadKey<187>(subkeys));
704 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<188>(subkeys))), LoadKey<189>(subkeys));
705 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<190>(subkeys))), LoadKey<191>(subkeys));
706 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<180>(subkeys))), LoadKey<181>(subkeys));
707 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<182>(subkeys))), LoadKey<183>(subkeys));
708 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<184>(subkeys))), LoadKey<185>(subkeys));
709 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<174>(subkeys))), LoadKey<175>(subkeys));
710 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<176>(subkeys))), LoadKey<177>(subkeys));
711 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<178>(subkeys))), LoadKey<179>(subkeys));
712 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<168>(subkeys))), LoadKey<169>(subkeys));
713 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<170>(subkeys))), LoadKey<171>(subkeys));
714 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<172>(subkeys))), LoadKey<173>(subkeys));
719 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<162>(subkeys))), LoadKey<163>(subkeys));
720 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<164>(subkeys))), LoadKey<165>(subkeys));
721 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<166>(subkeys))), LoadKey<167>(subkeys));
722 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<156>(subkeys))), LoadKey<157>(subkeys));
723 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<158>(subkeys))), LoadKey<159>(subkeys));
724 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<160>(subkeys))), LoadKey<161>(subkeys));
725 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<150>(subkeys))), LoadKey<151>(subkeys));
726 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<152>(subkeys))), LoadKey<153>(subkeys));
727 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<154>(subkeys))), LoadKey<155>(subkeys));
728 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<144>(subkeys))), LoadKey<145>(subkeys));
729 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<146>(subkeys))), LoadKey<147>(subkeys));
730 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<148>(subkeys))), LoadKey<149>(subkeys));
733 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<138>(subkeys))), LoadKey<139>(subkeys));
734 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<140>(subkeys))), LoadKey<141>(subkeys));
735 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<142>(subkeys))), LoadKey<143>(subkeys));
736 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<132>(subkeys))), LoadKey<133>(subkeys));
737 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<134>(subkeys))), LoadKey<135>(subkeys));
738 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<136>(subkeys))), LoadKey<137>(subkeys));
739 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<126>(subkeys))), LoadKey<127>(subkeys));
740 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<128>(subkeys))), LoadKey<129>(subkeys));
741 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<130>(subkeys))), LoadKey<131>(subkeys));
742 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<120>(subkeys))), LoadKey<121>(subkeys));
743 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<122>(subkeys))), LoadKey<123>(subkeys));
744 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<124>(subkeys))), LoadKey<125>(subkeys));
746 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<114>(subkeys))), LoadKey<115>(subkeys));
747 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<116>(subkeys))), LoadKey<117>(subkeys));
748 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<118>(subkeys))), LoadKey<119>(subkeys));
749 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<108>(subkeys))), LoadKey<109>(subkeys));
750 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<110>(subkeys))), LoadKey<111>(subkeys));
751 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<112>(subkeys))), LoadKey<113>(subkeys));
752 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<102>(subkeys))), LoadKey<103>(subkeys));
753 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<104>(subkeys))), LoadKey<105>(subkeys));
754 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<106>(subkeys))), LoadKey<107>(subkeys));
755 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<96>(subkeys))), LoadKey<97>(subkeys));
756 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<98>(subkeys))), LoadKey<99>(subkeys));
757 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<100>(subkeys))), LoadKey<101>(subkeys));
759 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<90>(subkeys))), LoadKey<91>(subkeys));
760 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<92>(subkeys))), LoadKey<93>(subkeys));
761 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<94>(subkeys))), LoadKey<95>(subkeys));
762 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<84>(subkeys))), LoadKey<85>(subkeys));
763 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<86>(subkeys))), LoadKey<87>(subkeys));
764 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<88>(subkeys))), LoadKey<89>(subkeys));
765 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<78>(subkeys))), LoadKey<79>(subkeys));
766 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<80>(subkeys))), LoadKey<81>(subkeys));
767 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<82>(subkeys))), LoadKey<83>(subkeys));
768 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<72>(subkeys))), LoadKey<73>(subkeys));
769 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<74>(subkeys))), LoadKey<75>(subkeys));
770 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<76>(subkeys))), LoadKey<77>(subkeys));
772 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<66>(subkeys))), LoadKey<67>(subkeys));
773 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<68>(subkeys))), LoadKey<69>(subkeys));
774 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<70>(subkeys))), LoadKey<71>(subkeys));
775 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<60>(subkeys))), LoadKey<61>(subkeys));
776 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<62>(subkeys))), LoadKey<63>(subkeys));
777 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<64>(subkeys))), LoadKey<65>(subkeys));
778 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<54>(subkeys))), LoadKey<55>(subkeys));
779 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<56>(subkeys))), LoadKey<57>(subkeys));
780 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<58>(subkeys))), LoadKey<59>(subkeys));
781 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<48>(subkeys))), LoadKey<49>(subkeys));
782 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<50>(subkeys))), LoadKey<51>(subkeys));
783 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<52>(subkeys))), LoadKey<53>(subkeys));
785 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<42>(subkeys))), LoadKey<43>(subkeys));
786 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<44>(subkeys))), LoadKey<45>(subkeys));
787 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<46>(subkeys))), LoadKey<47>(subkeys));
788 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<36>(subkeys))), LoadKey<37>(subkeys));
789 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<38>(subkeys))), LoadKey<39>(subkeys));
790 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<40>(subkeys))), LoadKey<41>(subkeys));
791 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<30>(subkeys))), LoadKey<31>(subkeys));
792 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<32>(subkeys))), LoadKey<33>(subkeys));
793 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<34>(subkeys))), LoadKey<35>(subkeys));
794 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<24>(subkeys))), LoadKey<25>(subkeys));
795 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<26>(subkeys))), LoadKey<27>(subkeys));
796 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<28>(subkeys))), LoadKey<29>(subkeys));
798 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<18>(subkeys))), LoadKey<19>(subkeys));
799 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<20>(subkeys))), LoadKey<21>(subkeys));
800 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<22>(subkeys))), LoadKey<23>(subkeys));
801 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<12>(subkeys))), LoadKey<13>(subkeys));
802 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<14>(subkeys))), LoadKey<15>(subkeys));
803 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<16>(subkeys))), LoadKey<17>(subkeys));
804 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<6>(subkeys))), LoadKey<7>(subkeys));
805 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<8>(subkeys))), LoadKey<9>(subkeys));
806 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<10>(subkeys))), LoadKey<11>(subkeys));
807 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<0>(subkeys))), LoadKey<1>(subkeys));
808 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<2>(subkeys))), LoadKey<3>(subkeys));
809 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<4>(subkeys))), LoadKey<5>(subkeys));
816#if (CRYPTOPP_ARM_NEON_AVAILABLE)
818inline void LEA_Enc_Block(uint32x4_t &block0,
819 const word32 *subkeys,
unsigned int rounds)
822 temp[0] = UnpackNEON<0>(block0);
823 temp[1] = UnpackNEON<1>(block0);
824 temp[2] = UnpackNEON<2>(block0);
825 temp[3] = UnpackNEON<3>(block0);
827 LEA_Encryption(temp, subkeys, rounds);
829 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
832inline void LEA_Dec_Block(uint32x4_t &block0,
833 const word32 *subkeys,
unsigned int rounds)
836 temp[0] = UnpackNEON<0>(block0);
837 temp[1] = UnpackNEON<1>(block0);
838 temp[2] = UnpackNEON<2>(block0);
839 temp[3] = UnpackNEON<3>(block0);
841 LEA_Decryption(temp, subkeys, rounds);
843 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
846inline void LEA_Enc_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
847 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
850 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
851 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
852 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
853 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
855 LEA_Encryption(temp, subkeys, rounds);
857 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
858 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
859 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
860 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
863inline void LEA_Dec_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
864 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
867 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
868 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
869 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
870 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
872 LEA_Decryption(temp, subkeys, rounds);
874 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
875 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
876 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
877 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
884#if (CRYPTOPP_SSSE3_AVAILABLE)
886inline void LEA_Enc_Block(__m128i &block0,
887 const word32 *subkeys,
unsigned int rounds)
890 temp[0] = UnpackXMM<0>(block0);
891 temp[1] = UnpackXMM<1>(block0);
892 temp[2] = UnpackXMM<2>(block0);
893 temp[3] = UnpackXMM<3>(block0);
895 LEA_Encryption(temp, subkeys, rounds);
897 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
900inline void LEA_Dec_Block(__m128i &block0,
901 const word32 *subkeys,
unsigned int rounds)
904 temp[0] = UnpackXMM<0>(block0);
905 temp[1] = UnpackXMM<1>(block0);
906 temp[2] = UnpackXMM<2>(block0);
907 temp[3] = UnpackXMM<3>(block0);
909 LEA_Decryption(temp, subkeys, rounds);
911 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
914inline void LEA_Enc_4_Blocks(__m128i &block0, __m128i &block1,
915 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
918 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
919 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
920 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
921 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
923 LEA_Encryption(temp, subkeys, rounds);
925 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
926 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
927 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
928 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
931inline void LEA_Dec_4_Blocks(__m128i &block0, __m128i &block1,
932 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
935 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
936 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
937 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
938 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
940 LEA_Decryption(temp, subkeys, rounds);
942 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
943 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
944 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
945 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
952#if (CRYPTOPP_POWER8_AVAILABLE)
955 const word32 *subkeys,
unsigned int rounds)
958 temp[0] = UnpackSIMD<0>(block0);
959 temp[1] = UnpackSIMD<1>(block0);
960 temp[2] = UnpackSIMD<2>(block0);
961 temp[3] = UnpackSIMD<3>(block0);
963 LEA_Encryption(temp, subkeys, rounds);
965 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
969 const word32 *subkeys,
unsigned int rounds)
972 temp[0] = UnpackSIMD<0>(block0);
973 temp[1] = UnpackSIMD<1>(block0);
974 temp[2] = UnpackSIMD<2>(block0);
975 temp[3] = UnpackSIMD<3>(block0);
977 LEA_Decryption(temp, subkeys, rounds);
979 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
986 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
987 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
988 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
989 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
991 LEA_Encryption(temp, subkeys, rounds);
993 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
994 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
995 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
996 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1003 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
1004 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
1005 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
1006 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
1008 LEA_Decryption(temp, subkeys, rounds);
1010 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
1011 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
1012 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
1013 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1018ANONYMOUS_NAMESPACE_END
1024#if defined(CRYPTOPP_SSSE3_AVAILABLE)
1025size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1026 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1029 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1032size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1033 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1036 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1040#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
1041size_t LEA_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1042 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1045 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1048size_t LEA_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1049 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1052 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1056#if defined(CRYPTOPP_POWER8_AVAILABLE)
1057size_t LEA_Enc_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1058 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1061 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1064size_t LEA_Dec_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1065 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1068 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Template for AdvancedProcessBlocks and SIMD processing.
size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
Library configuration file.
unsigned int word32
32-bit unsigned datatype
Classes for the LEA block cipher.
Utility functions for the Crypto++ library.
Crypto++ library namespace.
Support functions for PowerPC and vector operations.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
__vector unsigned long long uint64x2_p
Vector of 64-bit elements.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.