Crypto++ 8.7
Free C++ class library of cryptographic schemes
lsh512_sse.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "misc.h"
17
18// Squash MS LNK4221 and libtool warnings
19extern const char LSH512_SSE_FNAME[] = __FILE__;
20
21#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22
23#if defined(CRYPTOPP_SSSE3_AVAILABLE)
24# include <emmintrin.h>
25# include <tmmintrin.h>
26#endif
27
28#if defined(CRYPTOPP_XOP_AVAILABLE)
29# include <ammintrin.h>
30#endif
31
32// GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
33#if (CRYPTOPP_GCC_VERSION >= 40500)
34# include <x86intrin.h>
35#endif
36
37ANONYMOUS_NAMESPACE_BEGIN
38
39/* LSH Constants */
40
41const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
42// const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
43// const unsigned int LSH512_CV_BYTE_LEN = 128;
44const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
45
46// const unsigned int MSG_BLK_WORD_LEN = 32;
47const unsigned int CV_WORD_LEN = 16;
48const unsigned int CONST_WORD_LEN = 8;
49// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
50const unsigned int NUM_STEPS = 28;
51
52const unsigned int ROT_EVEN_ALPHA = 23;
53const unsigned int ROT_EVEN_BETA = 59;
54const unsigned int ROT_ODD_ALPHA = 7;
55const unsigned int ROT_ODD_BETA = 3;
56
57const unsigned int LSH_TYPE_512_512 = 0x0010040;
58const unsigned int LSH_TYPE_512_384 = 0x0010030;
59const unsigned int LSH_TYPE_512_256 = 0x0010020;
60const unsigned int LSH_TYPE_512_224 = 0x001001C;
61
62// const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
63// const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
64
65/* Error Code */
66
67const unsigned int LSH_SUCCESS = 0x0;
68// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
69// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
70const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
71const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
72
73/* Index into our state array */
74
75const unsigned int AlgorithmType = 80;
76const unsigned int RemainingBits = 81;
77
78NAMESPACE_END
79
80NAMESPACE_BEGIN(CryptoPP)
81NAMESPACE_BEGIN(LSH)
82
83// lsh512.cpp
84extern const word64 LSH512_IV224[CV_WORD_LEN];
85extern const word64 LSH512_IV256[CV_WORD_LEN];
86extern const word64 LSH512_IV384[CV_WORD_LEN];
87extern const word64 LSH512_IV512[CV_WORD_LEN];
88extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
89
90NAMESPACE_END // LSH
91NAMESPACE_END // Crypto++
92
93ANONYMOUS_NAMESPACE_BEGIN
94
95using CryptoPP::byte;
100
101using CryptoPP::GetBlock;
105
106using CryptoPP::LSH::LSH512_IV224;
107using CryptoPP::LSH::LSH512_IV256;
108using CryptoPP::LSH::LSH512_IV384;
109using CryptoPP::LSH::LSH512_IV512;
110using CryptoPP::LSH::LSH512_StepConstants;
111
112typedef byte lsh_u8;
113typedef word32 lsh_u32;
114typedef word64 lsh_u64;
115typedef word32 lsh_uint;
116typedef word32 lsh_err;
117typedef word32 lsh_type;
118
119struct LSH512_SSSE3_Context
120{
121 LSH512_SSSE3_Context(word64* state, word64 algType, word64& remainingBitLength) :
122 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
123 last_block(reinterpret_cast<byte*>(state+48)),
124 remain_databitlen(remainingBitLength),
125 alg_type(static_cast<lsh_type>(algType)) {}
126
127 lsh_u64* cv_l; // start of our state block
128 lsh_u64* cv_r;
129 lsh_u64* sub_msgs;
130 lsh_u8* last_block;
131 lsh_u64& remain_databitlen;
132 lsh_type alg_type;
133};
134
135struct LSH512_SSSE3_Internal
136{
137 LSH512_SSSE3_Internal(word64* state) :
138 submsg_e_l(state+16), submsg_e_r(state+24),
139 submsg_o_l(state+32), submsg_o_r(state+40) { }
140
141 lsh_u64* submsg_e_l; /* even left sub-message */
142 lsh_u64* submsg_e_r; /* even right sub-message */
143 lsh_u64* submsg_o_l; /* odd left sub-message */
144 lsh_u64* submsg_o_r; /* odd right sub-message */
145};
146
147// const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
148
149/* LSH AlgType Macro */
150
151inline bool LSH_IS_LSH512(lsh_uint val) {
152 return (val & 0xf0000) == 0x10000;
153}
154
155inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
156 return val >> 24;
157}
158
159inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
160 return val & 0xffff;
161}
162
163inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
164 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
165}
166
167inline lsh_u64 loadLE64(lsh_u64 v) {
169}
170
171lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
172 return rotlFixed(x, r);
173}
174
175// Original code relied upon unaligned lsh_u64 buffer
176inline void load_msg_blk(LSH512_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
177{
178 lsh_u64* submsg_e_l = i_state->submsg_e_l;
179 lsh_u64* submsg_e_r = i_state->submsg_e_r;
180 lsh_u64* submsg_o_l = i_state->submsg_o_l;
181 lsh_u64* submsg_o_r = i_state->submsg_o_r;
182
183 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
184 _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
185 _mm_storeu_si128(M128_CAST(submsg_e_l+2),
186 _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
187 _mm_storeu_si128(M128_CAST(submsg_e_l+4),
188 _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
189 _mm_storeu_si128(M128_CAST(submsg_e_l+6),
190 _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
191
192 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
193 _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
194 _mm_storeu_si128(M128_CAST(submsg_e_r+2),
195 _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
196 _mm_storeu_si128(M128_CAST(submsg_e_r+4),
197 _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
198 _mm_storeu_si128(M128_CAST(submsg_e_r+6),
199 _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
200
201 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
202 _mm_loadu_si128(CONST_M128_CAST(msgblk+128)));
203 _mm_storeu_si128(M128_CAST(submsg_o_l+2),
204 _mm_loadu_si128(CONST_M128_CAST(msgblk+144)));
205 _mm_storeu_si128(M128_CAST(submsg_o_l+4),
206 _mm_loadu_si128(CONST_M128_CAST(msgblk+160)));
207 _mm_storeu_si128(M128_CAST(submsg_o_l+6),
208 _mm_loadu_si128(CONST_M128_CAST(msgblk+176)));
209
210 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
211 _mm_loadu_si128(CONST_M128_CAST(msgblk+192)));
212 _mm_storeu_si128(M128_CAST(submsg_o_r+2),
213 _mm_loadu_si128(CONST_M128_CAST(msgblk+208)));
214 _mm_storeu_si128(M128_CAST(submsg_o_r+4),
215 _mm_loadu_si128(CONST_M128_CAST(msgblk+224)));
216 _mm_storeu_si128(M128_CAST(submsg_o_r+6),
217 _mm_loadu_si128(CONST_M128_CAST(msgblk+240)));
218}
219
220inline void msg_exp_even(LSH512_SSSE3_Internal* i_state)
221{
222 CRYPTOPP_ASSERT(i_state != NULLPTR);
223
224 lsh_u64* submsg_e_l = i_state->submsg_e_l;
225 lsh_u64* submsg_e_r = i_state->submsg_e_r;
226 lsh_u64* submsg_o_l = i_state->submsg_o_l;
227 lsh_u64* submsg_o_r = i_state->submsg_o_r;
228
229 __m128i temp;
230 _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_shuffle_epi32(
231 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)), _MM_SHUFFLE(1,0,3,2)));
232
233 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0));
234 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
235 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)));
236 _mm_storeu_si128(M128_CAST(submsg_e_l+2), temp);
237 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_shuffle_epi32(
238 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)), _MM_SHUFFLE(1,0,3,2)));
239
240 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4));
241 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_unpacklo_epi64(
242 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
243 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
244 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_unpackhi_epi64(
245 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
246 _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_shuffle_epi32(
247 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)), _MM_SHUFFLE(1,0,3,2)));
248
249 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0));
250 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
251 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)));
252 _mm_storeu_si128(M128_CAST(submsg_e_r+2), temp);
253 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_shuffle_epi32(
254 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)), _MM_SHUFFLE(1,0,3,2)));
255
256 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4));
257 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_unpacklo_epi64(
258 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
259 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
260 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_unpackhi_epi64(
261 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
262
263 _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi64(
264 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)),
265 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
266 _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_add_epi64(
267 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)),
268 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
269 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi64(
270 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)),
271 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
272 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_add_epi64(
273 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
274 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
275
276 _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi64(
277 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)),
278 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
279 _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_add_epi64(
280 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)),
281 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
282 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi64(
283 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)),
284 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
285 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_add_epi64(
286 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
287 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
288}
289
290inline void msg_exp_odd(LSH512_SSSE3_Internal* i_state)
291{
292 CRYPTOPP_ASSERT(i_state != NULLPTR);
293
294 lsh_u64* submsg_e_l = i_state->submsg_e_l;
295 lsh_u64* submsg_e_r = i_state->submsg_e_r;
296 lsh_u64* submsg_o_l = i_state->submsg_o_l;
297 lsh_u64* submsg_o_r = i_state->submsg_o_r;
298
299 __m128i temp;
300 _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_shuffle_epi32(
301 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)), _MM_SHUFFLE(1,0,3,2)));
302
303 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0));
304 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
305 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)));
306 _mm_storeu_si128(M128_CAST(submsg_o_l+2), temp);
307 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_shuffle_epi32(
308 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)), _MM_SHUFFLE(1,0,3,2)));
309
310 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4));
311 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_unpacklo_epi64(
312 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
313 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
314 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_unpackhi_epi64(
315 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
316 _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_shuffle_epi32(
317 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)), _MM_SHUFFLE(1,0,3,2)));
318
319 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0));
320 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
321 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)));
322 _mm_storeu_si128(M128_CAST(submsg_o_r+2), temp);
323 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_shuffle_epi32(
324 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)), _MM_SHUFFLE(1,0,3,2)));
325
326 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4));
327 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_unpacklo_epi64(
328 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
329 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
330 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_unpackhi_epi64(
331 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
332
333 _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi64(
334 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)),
335 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0))));
336 _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_add_epi64(
337 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)),
338 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
339 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi64(
340 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)),
341 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
342 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_add_epi64(
343 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
344 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
345
346 _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi64(
347 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)),
348 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0))));
349 _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_add_epi64(
350 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)),
351 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
352 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi64(
353 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)),
354 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
355 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_add_epi64(
356 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
357 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
358}
359
360inline void load_sc(const lsh_u64** p_const_v, size_t i)
361{
362 *p_const_v = &LSH512_StepConstants[i];
363}
364
365inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
366{
367 CRYPTOPP_ASSERT(i_state != NULLPTR);
368
369 lsh_u64* submsg_e_l = i_state->submsg_e_l;
370 lsh_u64* submsg_e_r = i_state->submsg_e_r;
371
372 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
373 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
374 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l))));
375 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
376 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
377 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r))));
378 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
379 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
380 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
381 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
382 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
383 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
384 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
385 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
386 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
387 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
388 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
389 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
390 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
391 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
392 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
393 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
394 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
395 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
396}
397
398inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
399{
400 CRYPTOPP_ASSERT(i_state != NULLPTR);
401
402 lsh_u64* submsg_o_l = i_state->submsg_o_l;
403 lsh_u64* submsg_o_r = i_state->submsg_o_r;
404
405 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
406 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
407 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
408 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
409 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
410 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
411 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
412 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
413 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
414 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
415 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
416 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
417 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
418 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
419 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
420 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
421 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
422 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
423 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
424 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
425 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
426 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
427 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
428 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
429}
430
431inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
432{
433 _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi64(
434 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
435 _mm_loadu_si128(CONST_M128_CAST(cv_r))));
436 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_add_epi64(
437 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
438 _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
439 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi64(
440 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
441 _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
442 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_add_epi64(
443 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
444 _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
445}
446
447template <unsigned int R>
448inline void rotate_blk(lsh_u64 cv[8])
449{
450#if defined(CRYPTOPP_XOP_AVAILABLE)
451 _mm_storeu_si128(M128_CAST(cv),
452 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
453 _mm_storeu_si128(M128_CAST(cv+2),
454 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R));
455 _mm_storeu_si128(M128_CAST(cv+4),
456 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
457 _mm_storeu_si128(M128_CAST(cv+6),
458 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R));
459
460#else
461 _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
462 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
463 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), 64-R)));
464 _mm_storeu_si128(M128_CAST(cv+2), _mm_or_si128(
465 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R),
466 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), 64-R)));
467 _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
468 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
469 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 64-R)));
470 _mm_storeu_si128(M128_CAST(cv+6), _mm_or_si128(
471 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R),
472 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), 64-R)));
473#endif
474}
475
476inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
477{
478 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
479 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
480 _mm_loadu_si128(CONST_M128_CAST(const_v))));
481 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
482 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
483 _mm_loadu_si128(CONST_M128_CAST(const_v+2))));
484 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
485 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
486 _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
487 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
488 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
489 _mm_loadu_si128(CONST_M128_CAST(const_v+6))));
490}
491
492inline void rotate_msg_gamma(lsh_u64 cv_r[8])
493{
494 // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
495 _mm_storeu_si128(M128_CAST(cv_r+0),
496 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
497 _mm_set_epi8(13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
498 _mm_storeu_si128(M128_CAST(cv_r+2),
499 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
500 _mm_set_epi8(9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4)));
501
502 _mm_storeu_si128(M128_CAST(cv_r+4),
503 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
504 _mm_set_epi8(12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
505 _mm_storeu_si128(M128_CAST(cv_r+6),
506 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
507 _mm_set_epi8(8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3)));
508}
509
510inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
511{
512 __m128i temp[2];
513 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
514 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_unpacklo_epi64(
515 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
516 _mm_loadu_si128(CONST_M128_CAST(cv_l+0))));
517 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_unpackhi_epi64(
518 temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+2))));
519
520 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+4));
521 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_unpacklo_epi64(
522 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
523 _mm_loadu_si128(CONST_M128_CAST(cv_l+4))));
524 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_unpackhi_epi64(
525 temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+6))));
526 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_shuffle_epi32(
527 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), _MM_SHUFFLE(1,0,3,2)));
528
529 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+0));
530 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_unpacklo_epi64(
531 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
532 _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
533 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_unpackhi_epi64(
534 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), temp[0]));
535 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_shuffle_epi32(
536 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), _MM_SHUFFLE(1,0,3,2)));
537
538 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+4));
539 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_unpacklo_epi64(
540 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
541 _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
542 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_unpackhi_epi64(
543 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), temp[0]));
544
545 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
546 temp[1] = _mm_loadu_si128(CONST_M128_CAST(cv_l+2));
547
548 _mm_storeu_si128(M128_CAST(cv_l+0),
549 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
550 _mm_storeu_si128(M128_CAST(cv_l+2),
551 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)));
552 _mm_storeu_si128(M128_CAST(cv_l+4),
553 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
554 _mm_storeu_si128(M128_CAST(cv_l+6),
555 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)));
556 _mm_storeu_si128(M128_CAST(cv_r+4),
557 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
558 _mm_storeu_si128(M128_CAST(cv_r+6),
559 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)));
560
561 _mm_storeu_si128(M128_CAST(cv_r+0), temp[0]);
562 _mm_storeu_si128(M128_CAST(cv_r+2), temp[1]);
563};
564
565/* -------------------------------------------------------- *
566* step function
567* -------------------------------------------------------- */
568
569template <unsigned int Alpha, unsigned int Beta>
570inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
571{
572 add_blk(cv_l, cv_r);
573 rotate_blk<Alpha>(cv_l);
574 xor_with_const(cv_l, const_v);
575 add_blk(cv_r, cv_l);
576 rotate_blk<Beta>(cv_r);
577 add_blk(cv_l, cv_r);
578 rotate_msg_gamma(cv_r);
579}
580
581/* -------------------------------------------------------- *
582* compression function
583* -------------------------------------------------------- */
584
585inline void compress(LSH512_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
586{
587 CRYPTOPP_ASSERT(ctx != NULLPTR);
588
589 LSH512_SSSE3_Internal s_state(ctx->cv_l);
590 LSH512_SSSE3_Internal* i_state = &s_state;
591
592 const lsh_u64* const_v = NULL;
593 lsh_u64 *cv_l = ctx->cv_l;
594 lsh_u64 *cv_r = ctx->cv_r;
595
596 load_msg_blk(i_state, pdMsgBlk);
597
598 msg_add_even(cv_l, cv_r, i_state);
599 load_sc(&const_v, 0);
600 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
601 word_perm(cv_l, cv_r);
602
603 msg_add_odd(cv_l, cv_r, i_state);
604 load_sc(&const_v, 8);
605 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
606 word_perm(cv_l, cv_r);
607
608 for (size_t i = 1; i < NUM_STEPS / 2; i++)
609 {
610 msg_exp_even(i_state);
611 msg_add_even(cv_l, cv_r, i_state);
612 load_sc(&const_v, 16 * i);
613 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
614 word_perm(cv_l, cv_r);
615
616 msg_exp_odd(i_state);
617 msg_add_odd(cv_l, cv_r, i_state);
618 load_sc(&const_v, 16 * i + 8);
619 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
620 word_perm(cv_l, cv_r);
621 }
622
623 msg_exp_even(i_state);
624 msg_add_even(cv_l, cv_r, i_state);
625}
626
627/* -------------------------------------------------------- */
628
629inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
630{
631 // The IV's are 32-byte aligned so we can use aligned loads.
632 _mm_storeu_si128(M128_CAST(cv_l+0),
633 _mm_load_si128(CONST_M128_CAST(iv+0)));
634 _mm_storeu_si128(M128_CAST(cv_l+2),
635 _mm_load_si128(CONST_M128_CAST(iv+2)));
636 _mm_storeu_si128(M128_CAST(cv_l+4),
637 _mm_load_si128(CONST_M128_CAST(iv+4)));
638 _mm_storeu_si128(M128_CAST(cv_l+6),
639 _mm_load_si128(CONST_M128_CAST(iv+6)));
640 _mm_storeu_si128(M128_CAST(cv_r+0),
641 _mm_load_si128(CONST_M128_CAST(iv+8)));
642 _mm_storeu_si128(M128_CAST(cv_r+2),
643 _mm_load_si128(CONST_M128_CAST(iv+10)));
644 _mm_storeu_si128(M128_CAST(cv_r+4),
645 _mm_load_si128(CONST_M128_CAST(iv+12)));
646 _mm_storeu_si128(M128_CAST(cv_r+6),
647 _mm_load_si128(CONST_M128_CAST(iv+14)));
648}
649
650inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
651{
652 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
653 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128());
654 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
655 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128());
656 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
657 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128());
658 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
659 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128());
660}
661
662inline void zero_submsgs(LSH512_SSSE3_Context* ctx)
663{
664 lsh_u64* sub_msgs = ctx->sub_msgs;
665
666 _mm_storeu_si128(M128_CAST(sub_msgs+ 0),
667 _mm_setzero_si128());
668 _mm_storeu_si128(M128_CAST(sub_msgs+ 2),
669 _mm_setzero_si128());
670 _mm_storeu_si128(M128_CAST(sub_msgs+ 4),
671 _mm_setzero_si128());
672 _mm_storeu_si128(M128_CAST(sub_msgs+ 6),
673 _mm_setzero_si128());
674 _mm_storeu_si128(M128_CAST(sub_msgs+ 8),
675 _mm_setzero_si128());
676 _mm_storeu_si128(M128_CAST(sub_msgs+10),
677 _mm_setzero_si128());
678 _mm_storeu_si128(M128_CAST(sub_msgs+12),
679 _mm_setzero_si128());
680 _mm_storeu_si128(M128_CAST(sub_msgs+14),
681 _mm_setzero_si128());
682}
683
684inline void init224(LSH512_SSSE3_Context* ctx)
685{
686 CRYPTOPP_ASSERT(ctx != NULLPTR);
687
688 zero_submsgs(ctx);
689 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
690}
691
692inline void init256(LSH512_SSSE3_Context* ctx)
693{
694 CRYPTOPP_ASSERT(ctx != NULLPTR);
695
696 zero_submsgs(ctx);
697 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
698}
699
700inline void init384(LSH512_SSSE3_Context* ctx)
701{
702 CRYPTOPP_ASSERT(ctx != NULLPTR);
703
704 zero_submsgs(ctx);
705 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
706}
707
708inline void init512(LSH512_SSSE3_Context* ctx)
709{
710 CRYPTOPP_ASSERT(ctx != NULLPTR);
711
712 zero_submsgs(ctx);
713 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
714}
715
716/* -------------------------------------------------------- */
717
718inline void fin(LSH512_SSSE3_Context* ctx)
719{
720 CRYPTOPP_ASSERT(ctx != NULLPTR);
721
722 _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
723 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
724 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
725 _mm_storeu_si128(M128_CAST(ctx->cv_l+2), _mm_xor_si128(
726 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+2)),
727 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+2))));
728 _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
729 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
730 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
731 _mm_storeu_si128(M128_CAST(ctx->cv_l+6), _mm_xor_si128(
732 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+6)),
733 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+6))));
734}
735
736/* -------------------------------------------------------- */
737
738inline void get_hash(LSH512_SSSE3_Context* ctx, lsh_u8* pbHashVal)
739{
740 CRYPTOPP_ASSERT(ctx != NULLPTR);
741 CRYPTOPP_ASSERT(ctx->alg_type != 0);
742 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
743
744 lsh_uint alg_type = ctx->alg_type;
745 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
746 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
747
748 // Multiplying by sizeof(lsh_u8) looks odd...
749 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
750 if (hash_val_bit_len){
751 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
752 }
753}
754
755/* -------------------------------------------------------- */
756
757lsh_err lsh512_init_ssse3(LSH512_SSSE3_Context* ctx)
758{
759 CRYPTOPP_ASSERT(ctx != NULLPTR);
760 CRYPTOPP_ASSERT(ctx->alg_type != 0);
761
762 lsh_u32 alg_type = ctx->alg_type;
763 const lsh_u64* const_v = NULL;
764 ctx->remain_databitlen = 0;
765
766 switch (alg_type){
767 case LSH_TYPE_512_512:
768 init512(ctx);
769 return LSH_SUCCESS;
770 case LSH_TYPE_512_384:
771 init384(ctx);
772 return LSH_SUCCESS;
773 case LSH_TYPE_512_256:
774 init256(ctx);
775 return LSH_SUCCESS;
776 case LSH_TYPE_512_224:
777 init224(ctx);
778 return LSH_SUCCESS;
779 default:
780 break;
781 }
782
783 lsh_u64* cv_l = ctx->cv_l;
784 lsh_u64* cv_r = ctx->cv_r;
785
786 zero_iv(cv_l, cv_r);
787 cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
788 cv_l[1] = LSH_GET_HASHBIT(alg_type);
789
790 for (size_t i = 0; i < NUM_STEPS / 2; i++)
791 {
792 //Mix
793 load_sc(&const_v, i * 16);
794 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
795 word_perm(cv_l, cv_r);
796
797 load_sc(&const_v, i * 16 + 8);
798 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
799 word_perm(cv_l, cv_r);
800 }
801
802 return LSH_SUCCESS;
803}
804
805lsh_err lsh512_update_ssse3(LSH512_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
806{
807 CRYPTOPP_ASSERT(ctx != NULLPTR);
808 CRYPTOPP_ASSERT(data != NULLPTR);
809 CRYPTOPP_ASSERT(databitlen % 8 == 0);
810 CRYPTOPP_ASSERT(ctx->alg_type != 0);
811
812 if (databitlen == 0){
813 return LSH_SUCCESS;
814 }
815
816 // We are byte oriented. tail bits will always be 0.
817 size_t databytelen = databitlen >> 3;
818 // lsh_uint pos2 = databitlen & 0x7;
819 const size_t pos2 = 0;
820
821 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
822 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
823 const size_t remain_msg_bit = 0;
824
825 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
826 return LSH_ERR_INVALID_STATE;
827 }
828 if (remain_msg_bit > 0){
829 return LSH_ERR_INVALID_DATABITLEN;
830 }
831
832 if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
833 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
834 ctx->remain_databitlen += (lsh_uint)databitlen;
835 remain_msg_byte += (lsh_uint)databytelen;
836 if (pos2){
837 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
838 }
839 return LSH_SUCCESS;
840 }
841
842 if (remain_msg_byte > 0){
843 size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
844 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
845 compress(ctx, ctx->last_block);
846 data += more_byte;
847 databytelen -= more_byte;
848 remain_msg_byte = 0;
849 ctx->remain_databitlen = 0;
850 }
851
852 while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
853 {
854 // This call to compress caused some trouble.
855 // The data pointer can become unaligned in the
856 // previous block.
857 compress(ctx, data);
858 data += LSH512_MSG_BLK_BYTE_LEN;
859 databytelen -= LSH512_MSG_BLK_BYTE_LEN;
860 }
861
862 if (databytelen > 0){
863 memcpy(ctx->last_block, data, databytelen);
864 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
865 }
866
867 if (pos2){
868 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
869 ctx->remain_databitlen += pos2;
870 }
871 return LSH_SUCCESS;
872}
873
874lsh_err lsh512_final_ssse3(LSH512_SSSE3_Context* ctx, lsh_u8* hashval)
875{
876 CRYPTOPP_ASSERT(ctx != NULLPTR);
877 CRYPTOPP_ASSERT(hashval != NULLPTR);
878
879 // We are byte oriented. tail bits will always be 0.
880 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
881 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
882 const size_t remain_msg_bit = 0;
883
884 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
885 return LSH_ERR_INVALID_STATE;
886 }
887
888 if (remain_msg_bit){
889 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
890 }
891 else{
892 ctx->last_block[remain_msg_byte] = 0x80;
893 }
894 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
895
896 compress(ctx, ctx->last_block);
897
898 fin(ctx);
899 get_hash(ctx, hashval);
900
901 return LSH_SUCCESS;
902}
903
904ANONYMOUS_NAMESPACE_END
905
906NAMESPACE_BEGIN(CryptoPP)
907
908extern
909void LSH512_Base_Restart_SSSE3(word64* state)
910{
911 state[RemainingBits] = 0;
912 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
913 lsh_err err = lsh512_init_ssse3(&ctx);
914
915 if (err != LSH_SUCCESS)
916 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_ssse3 failed");
917}
918
919extern
920void LSH512_Base_Update_SSSE3(word64* state, const byte *input, size_t size)
921{
922 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
923 lsh_err err = lsh512_update_ssse3(&ctx, input, 8*size);
924
925 if (err != LSH_SUCCESS)
926 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_ssse3 failed");
927}
928
929extern
930void LSH512_Base_TruncatedFinal_SSSE3(word64* state, byte *hash, size_t)
931{
932 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
933 lsh_err err = lsh512_final_ssse3(&ctx, hash);
934
935 if (err != LSH_SUCCESS)
936 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_ssse3 failed");
937}
938
939NAMESPACE_END
940
941#endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1548
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2208
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1599
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68