Crypto++ 8.7
Free C++ class library of cryptographic schemes
shacal2_simd.cpp
1// shacla2_simd.cpp - written and placed in the public domain by
2// Jeffrey Walton and Jack Lloyd
3//
4// Jack Lloyd and the Botan team allowed Crypto++ to use parts of
5// Botan's implementation under the same license as Crypto++
6// is released. The code for SHACAL2_Enc_ProcessAndXorBlock_SHANI
7// below is Botan's x86_encrypt_blocks with minor tweaks. Many thanks
8// to the Botan team. Also see http://github.com/randombit/botan/.
9//
10// This source file uses intrinsics to gain access to SHA-NI and
11// ARMv8a SHA instructions. A separate source file is needed because
12// additional CXXFLAGS are required to enable the appropriate instruction
13// sets in some build configurations.
14
15#include "pch.h"
16#include "config.h"
17#include "sha.h"
18#include "misc.h"
19
20#if (CRYPTOPP_SHANI_AVAILABLE)
21# include <nmmintrin.h>
22# include <immintrin.h>
23#endif
24
25// Squash MS LNK4221 and libtool warnings
26extern const char SHACAL2_SIMD_FNAME[] = __FILE__;
27
28NAMESPACE_BEGIN(CryptoPP)
29
30#if CRYPTOPP_SHANI_AVAILABLE
31void SHACAL2_Enc_ProcessAndXorBlock_SHANI(const word32* subKeys, const byte *inBlock, const byte *xorBlock, byte *outBlock)
32{
33 CRYPTOPP_ASSERT(subKeys);
34 CRYPTOPP_ASSERT(inBlock);
35 CRYPTOPP_ASSERT(outBlock);
36
37 const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
38 const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
39
40 __m128i B0 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 0)), MASK1);
41 __m128i B1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 16)), MASK2);
42
43 __m128i TMP = _mm_alignr_epi8(B0, B1, 8);
44 B1 = _mm_blend_epi16(B1, B0, 0xF0);
45 B0 = TMP;
46
47#if 0
48 // SSE2 + SSSE3, but 0.2 cpb slower on a Celeraon J3455
49 const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
50 const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
51
52 __m128i B0 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 0));
53 __m128i B1 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 16));
54
55 __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2);
56 B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2);
57 B0 = TMP;
58#endif
59
60 const byte* keys = reinterpret_cast<const byte*>(subKeys);
61 for (size_t i = 0; i != 8; ++i)
62 {
63 const __m128i RK0 = _mm_load_si128(CONST_M128_CAST(keys + 32*i));
64 const __m128i RK2 = _mm_load_si128(CONST_M128_CAST(keys + 32*i+16));
65 const __m128i RK1 = _mm_srli_si128(RK0, 8);
66 const __m128i RK3 = _mm_srli_si128(RK2, 8);
67
68 B1 = _mm_sha256rnds2_epu32(B1, B0, RK0);
69 B0 = _mm_sha256rnds2_epu32(B0, B1, RK1);
70 B1 = _mm_sha256rnds2_epu32(B1, B0, RK2);
71 B0 = _mm_sha256rnds2_epu32(B0, B1, RK3);
72 }
73
74 TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1);
75 B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1);
76 B0 = TMP;
77
78 if (xorBlock)
79 {
80 _mm_storeu_si128(M128_CAST(outBlock + 0),
81 _mm_xor_si128(B0, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 0))));
82
83 _mm_storeu_si128(M128_CAST(outBlock + 16),
84 _mm_xor_si128(B1, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 16))));
85 }
86 else
87 {
88 _mm_storeu_si128(M128_CAST(outBlock + 0), B0);
89 _mm_storeu_si128(M128_CAST(outBlock + 16), B1);
90 }
91}
92#endif
93
94NAMESPACE_END
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Library configuration file.
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
Utility functions for the Crypto++ library.
Crypto++ library namespace.
Precompiled header file.
Classes for SHA-1 and SHA-2 family of message digests.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68