Crypto++ 8.7
Free C++ class library of cryptographic schemes
gcm.cpp
1// gcm.cpp - originally written and placed in the public domain by Wei Dai.
2// ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3// multiply routines are less efficient because they shadow x86.
4// The precomputed key table integration makes it tricky to use the
5// more efficient ARMv8 implementation of the multiply and reduce.
6
7// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8
9#include "pch.h"
10#include "config.h"
11
12#ifndef CRYPTOPP_IMPORTS
13#ifndef CRYPTOPP_GENERATE_X64_MASM
14
15// Visual Studio .Net 2003 compiler crash
16#if defined(_MSC_VER) && (_MSC_VER < 1400)
17# pragma optimize("", off)
18#endif
19
20#include "gcm.h"
21#include "cpu.h"
22
23#if defined(CRYPTOPP_DISABLE_GCM_ASM)
24# undef CRYPTOPP_X86_ASM_AVAILABLE
25# undef CRYPTOPP_X32_ASM_AVAILABLE
26# undef CRYPTOPP_X64_ASM_AVAILABLE
27# undef CRYPTOPP_SSE2_ASM_AVAILABLE
28#endif
29
30NAMESPACE_BEGIN(CryptoPP)
31
32#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
33// Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
34// 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
35#if defined(CRYPTOPP_DISABLE_MIXED_ASM)
36// 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path.
37# define USE_MOVD_REG32 1
38#elif defined(__GNUC__) || defined(_MSC_VER)
39// 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
40# define USE_MOVD_REG32_OR_REG64 1
41#else
42// 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
43# define USE_MOV_REG32_OR_REG64 1
44#endif
45#endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
46
47word16 GCM_Base::s_reductionTable[256];
48volatile bool GCM_Base::s_reductionTableInitialized = false;
49
50void GCM_Base::GCTR::IncrementCounterBy256()
51{
52 IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
53}
54
55static inline void Xor16(byte *a, const byte *b, const byte *c)
56{
57 CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
58 CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
59 CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
60 ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
61 ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
62}
63
64#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
65// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
66// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
67extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
68#endif // SSE2
69
70#if CRYPTOPP_ARM_NEON_AVAILABLE
71extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
72#endif
73
74#if CRYPTOPP_POWER8_AVAILABLE
75extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
76#endif
77
78#if CRYPTOPP_CLMUL_AVAILABLE
79extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
80extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
81const unsigned int s_cltableSizeInBlocks = 8;
82extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
83#endif // CRYPTOPP_CLMUL_AVAILABLE
84
85#if CRYPTOPP_ARM_PMULL_AVAILABLE
86extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
87extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
88const unsigned int s_cltableSizeInBlocks = 8;
89extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
90#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
91
92#if CRYPTOPP_POWER8_VMULL_AVAILABLE
93extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
94extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
95const unsigned int s_cltableSizeInBlocks = 8;
96extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer);
97#endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
98
99void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
100{
101 BlockCipher &blockCipher = AccessBlockCipher();
102 blockCipher.SetKey(userKey, keylength, params);
103
104 // GCM is only defined for 16-byte block ciphers at the moment.
105 // However, variable blocksize support means we have to defer
106 // blocksize checks to runtime after the key is set. Also see
107 // https://github.com/weidai11/cryptopp/issues/408.
108 const unsigned int blockSize = blockCipher.BlockSize();
109 CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
110 if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
111 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
112
113 int tableSize, i, j, k;
114
115#if CRYPTOPP_CLMUL_AVAILABLE
116 if (HasCLMUL())
117 {
118 // Avoid "parameter not used" error and suppress Coverity finding
119 (void)params.GetIntValue(Name::TableSize(), tableSize);
120 tableSize = s_cltableSizeInBlocks * blockSize;
121 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
122 }
123 else
124#elif CRYPTOPP_ARM_PMULL_AVAILABLE
125 if (HasPMULL())
126 {
127 // Avoid "parameter not used" error and suppress Coverity finding
128 (void)params.GetIntValue(Name::TableSize(), tableSize);
129 tableSize = s_cltableSizeInBlocks * blockSize;
130 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
131 }
132 else
133#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
134 if (HasPMULL())
135 {
136 // Avoid "parameter not used" error and suppress Coverity finding
137 (void)params.GetIntValue(Name::TableSize(), tableSize);
138 tableSize = s_cltableSizeInBlocks * blockSize;
139 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
140 }
141 else
142#endif
143 {
144 if (params.GetIntValue(Name::TableSize(), tableSize))
145 tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
146 else
147 tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
148
149 //#if defined(_MSC_VER) && (_MSC_VER < 1400)
150 // VC 2003 workaround: compiler generates bad code for 64K tables
151 //tableSize = 2*1024;
152 //#endif
153 }
154
155 m_buffer.resize(3*blockSize + tableSize);
156 byte *mulTable = MulTable();
157 byte *hashKey = HashKey();
158 memset(hashKey, 0, REQUIRED_BLOCKSIZE);
159 blockCipher.ProcessBlock(hashKey);
160
161#if CRYPTOPP_CLMUL_AVAILABLE
162 if (HasCLMUL())
163 {
164 GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
165 return;
166 }
167#elif CRYPTOPP_ARM_PMULL_AVAILABLE
168 if (HasPMULL())
169 {
170 GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
171 return;
172 }
173#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
174 if (HasPMULL())
175 {
176 GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
177 return;
178 }
179#endif
180
181 word64 V0, V1;
183 Block::Get(hashKey)(V0)(V1);
184
185 if (tableSize == 64*1024)
186 {
187 for (i=0; i<128; i++)
188 {
189 k = i%8;
190 Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
191
192 int x = (int)V1 & 1;
193 V1 = (V1>>1) | (V0<<63);
194 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
195 }
196
197 for (i=0; i<16; i++)
198 {
199 memset(mulTable+i*256*16, 0, 16);
200#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
201 if (HasSSE2())
202 for (j=2; j<=0x80; j*=2)
203 for (k=1; k<j; k++)
204 GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
205 else
206#elif CRYPTOPP_ARM_NEON_AVAILABLE
207 if (HasNEON())
208 for (j=2; j<=0x80; j*=2)
209 for (k=1; k<j; k++)
210 GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
211 else
212#elif CRYPTOPP_POWER8_AVAILABLE
213 if (HasPower8())
214 for (j=2; j<=0x80; j*=2)
215 for (k=1; k<j; k++)
216 GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
217 else
218#endif
219 for (j=2; j<=0x80; j*=2)
220 for (k=1; k<j; k++)
221 Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
222 }
223 }
224 else
225 {
226 if (!s_reductionTableInitialized)
227 {
228 s_reductionTable[0] = 0;
229 word16 x = 0x01c2;
230 s_reductionTable[1] = ByteReverse(x);
231 for (unsigned int ii=2; ii<=0x80; ii*=2)
232 {
233 x <<= 1;
234 s_reductionTable[ii] = ByteReverse(x);
235 for (unsigned int jj=1; jj<ii; jj++)
236 s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
237 }
238 s_reductionTableInitialized = true;
239 }
240
241 for (i=0; i<128-24; i++)
242 {
243 k = i%32;
244 if (k < 4)
245 Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
246 else if (k < 8)
247 Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
248
249 int x = (int)V1 & 1;
250 V1 = (V1>>1) | (V0<<63);
251 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
252 }
253
254 for (i=0; i<4; i++)
255 {
256 memset(mulTable+i*256, 0, 16);
257 memset(mulTable+1024+i*256, 0, 16);
258#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
259 if (HasSSE2())
260 for (j=2; j<=8; j*=2)
261 for (k=1; k<j; k++)
262 {
263 GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
264 GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
265 }
266 else
267#elif CRYPTOPP_ARM_NEON_AVAILABLE
268 if (HasNEON())
269 for (j=2; j<=8; j*=2)
270 for (k=1; k<j; k++)
271 {
272 GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
273 GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
274 }
275 else
276#elif CRYPTOPP_POWER8_AVAILABLE
277 if (HasPower8())
278 for (j=2; j<=8; j*=2)
279 for (k=1; k<j; k++)
280 {
281 GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
282 GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
283 }
284 else
285#endif
286 for (j=2; j<=8; j*=2)
287 for (k=1; k<j; k++)
288 {
289 Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
290 Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
291 }
292 }
293 }
294}
295
296inline void GCM_Base::ReverseHashBufferIfNeeded()
297{
298#if CRYPTOPP_CLMUL_AVAILABLE
299 if (HasCLMUL())
300 {
301 GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
302 }
303#elif CRYPTOPP_ARM_PMULL_AVAILABLE
304 if (HasPMULL())
305 {
306 GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
307 }
308#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
309 if (HasPMULL())
310 {
311 GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
312 }
313#endif
314}
315
316void GCM_Base::Resync(const byte *iv, size_t len)
317{
318 BlockCipher &cipher = AccessBlockCipher();
319 byte *hashBuffer = HashBuffer();
320
321 if (len == 12)
322 {
323 memcpy(hashBuffer, iv, len);
324 memset(hashBuffer+len, 0, 3);
325 hashBuffer[len+3] = 1;
326 }
327 else
328 {
329 size_t origLen = len;
330 memset(hashBuffer, 0, HASH_BLOCKSIZE);
331
332 if (len >= HASH_BLOCKSIZE)
333 {
334 len = GCM_Base::AuthenticateBlocks(iv, len);
335 iv += (origLen - len);
336 }
337
338 if (len > 0)
339 {
340 memcpy(m_buffer, iv, len);
341 memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
342 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
343 }
344
345 PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
346 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
347
348 ReverseHashBufferIfNeeded();
349 }
350
351 if (m_state >= State_IVSet)
352 m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
353 else
354 m_ctr.SetCipherWithIV(cipher, hashBuffer);
355
356 m_ctr.Seek(HASH_BLOCKSIZE);
357
358 memset(hashBuffer, 0, HASH_BLOCKSIZE);
359}
360
361unsigned int GCM_Base::OptimalDataAlignment() const
362{
363 return
364#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
365 HasSSE2() ? 16 :
366#elif CRYPTOPP_ARM_NEON_AVAILABLE
367 HasNEON() ? 4 :
368#elif CRYPTOPP_POWER8_AVAILABLE
369 HasPower8() ? 16 :
370#endif
371 GetBlockCipher().OptimalDataAlignment();
372}
373
374#if CRYPTOPP_MSC_VERSION
375# pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
376#endif
377
378#endif // Not CRYPTOPP_GENERATE_X64_MASM
379
380#ifdef CRYPTOPP_X64_MASM_AVAILABLE
381extern "C" {
382void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
383void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer);
384}
385#endif
386
387#ifndef CRYPTOPP_GENERATE_X64_MASM
388
389size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
390{
391#if CRYPTOPP_CLMUL_AVAILABLE
392 if (HasCLMUL())
393 {
394 return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
395 }
396#elif CRYPTOPP_ARM_PMULL_AVAILABLE
397 if (HasPMULL())
398 {
399 return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
400 }
401#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
402 if (HasPMULL())
403 {
404 return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
405 }
406#endif
407
409 word64 *hashBuffer = (word64 *)(void *)HashBuffer();
410 CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
411
412 switch (2*(m_buffer.size()>=64*1024)
413#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
414 + HasSSE2()
415//#elif CRYPTOPP_ARM_NEON_AVAILABLE
416// + HasNEON()
417#endif
418 )
419 {
420 case 0: // non-SSE2 and 2K tables
421 {
422 byte *mulTable = MulTable();
423 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
424
425 do
426 {
427 word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
428 Block::Get(data)(y0)(y1);
429 x0 ^= y0;
430 x1 ^= y1;
431
432 data += HASH_BLOCKSIZE;
433 len -= HASH_BLOCKSIZE;
434
435 #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
436
437 #if (CRYPTOPP_LITTLE_ENDIAN)
438 #if CRYPTOPP_BOOL_SLOW_WORD64
439 word32 z0 = (word32)x0;
440 word32 z1 = (word32)(x0>>32);
441 word32 z2 = (word32)x1;
442 word32 z3 = (word32)(x1>>32);
443 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
444 #else
445 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
446 #endif
447 #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
448 #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
449 #else
450 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
451 #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
452 #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
453 #endif
454
455 #define GF_MUL_32BY128(op, a, b, c) \
456 a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
457 a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
458 b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
459 b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
460 c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
461 c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
462 d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
463 d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
464
465 GF_MUL_32BY128(=, 0, 0, 0)
466 GF_MUL_32BY128(^=, 0, 1, 1)
467 GF_MUL_32BY128(^=, 1, 0, 2)
468 GF_MUL_32BY128(^=, 1, 1, 3)
469
470 word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
471 GF_SHIFT_8(d)
472 c0 ^= d0; c1 ^= d1;
473 r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
474 GF_SHIFT_8(c)
475 b0 ^= c0; b1 ^= c1;
476 r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
477 GF_SHIFT_8(b)
478 a0 ^= b0; a1 ^= b1;
480 x0 = a0; x1 = a1;
481 }
482 while (len >= HASH_BLOCKSIZE);
483
484 hashBuffer[0] = x0; hashBuffer[1] = x1;
485 return len;
486 }
487
488 case 2: // non-SSE2 and 64K tables
489 {
490 byte *mulTable = MulTable();
491 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
492
493 do
494 {
495 word64 y0, y1, a0, a1;
496 Block::Get(data)(y0)(y1);
497 x0 ^= y0;
498 x1 ^= y1;
499
500 data += HASH_BLOCKSIZE;
501 len -= HASH_BLOCKSIZE;
502
503 #undef READ_TABLE_WORD64_COMMON
504 #undef READ_TABLE_WORD64
505
506 #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
507
508 #if (CRYPTOPP_LITTLE_ENDIAN)
509 #if CRYPTOPP_BOOL_SLOW_WORD64
510 word32 z0 = (word32)x0;
511 word32 z1 = (word32)(x0>>32);
512 word32 z2 = (word32)x1;
513 word32 z3 = (word32)(x1>>32);
514 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
515 #else
516 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
517 #endif
518 #else
519 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
520 #endif
521
522 #define GF_MUL_8BY128(op, b, c, d) \
523 a0 op READ_TABLE_WORD64(b, c, d, 0);\
524 a1 op READ_TABLE_WORD64(b, c, d, 1);\
525
526 GF_MUL_8BY128(=, 0, 0, 0)
527 GF_MUL_8BY128(^=, 0, 0, 1)
528 GF_MUL_8BY128(^=, 0, 0, 2)
529 GF_MUL_8BY128(^=, 0, 0, 3)
530 GF_MUL_8BY128(^=, 0, 1, 0)
531 GF_MUL_8BY128(^=, 0, 1, 1)
532 GF_MUL_8BY128(^=, 0, 1, 2)
533 GF_MUL_8BY128(^=, 0, 1, 3)
534 GF_MUL_8BY128(^=, 1, 2, 0)
535 GF_MUL_8BY128(^=, 1, 2, 1)
536 GF_MUL_8BY128(^=, 1, 2, 2)
537 GF_MUL_8BY128(^=, 1, 2, 3)
538 GF_MUL_8BY128(^=, 1, 3, 0)
539 GF_MUL_8BY128(^=, 1, 3, 1)
540 GF_MUL_8BY128(^=, 1, 3, 2)
541 GF_MUL_8BY128(^=, 1, 3, 3)
542
543 x0 = a0; x1 = a1;
544 }
545 while (len >= HASH_BLOCKSIZE);
546
547 hashBuffer[0] = x0; hashBuffer[1] = x1;
548 return len;
549 }
550#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
551
552#ifdef CRYPTOPP_X64_MASM_AVAILABLE
553 case 1: // SSE2 and 2K tables
554 GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
555 return len % 16;
556 case 3: // SSE2 and 64K tables
557 GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
558 return len % 16;
559#endif
560
561#if CRYPTOPP_SSE2_ASM_AVAILABLE
562
563 case 1: // SSE2 and 2K tables
564 {
565 #ifdef __GNUC__
566 __asm__ __volatile__
567 (
568 INTEL_NOPREFIX
569 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
570 ALIGN 8
571 GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
572 rex_push_reg rsi
573 push_reg rdi
574 push_reg rbx
575 .endprolog
576 mov rsi, r8
577 mov r11, r9
578 #else
579 AS2( mov WORD_REG(cx), data )
580 AS2( mov WORD_REG(dx), len )
581 AS2( mov WORD_REG(si), hashBuffer )
582 AS2( shr WORD_REG(dx), 4 )
583 #endif
584
586 AS1(push rbx)
587 AS1(push rbp)
588 #else
589 AS_PUSH_IF86( bx)
590 AS_PUSH_IF86( bp)
591 #endif
592
593 #ifdef __GNUC__
594 AS2( mov AS_REG_7, WORD_REG(di))
596 AS2( lea AS_REG_7, s_reductionTable)
597 #endif
598
599 AS2( movdqa xmm0, [WORD_REG(si)] )
600
601 #define MUL_TABLE_0 WORD_REG(si) + 32
602 #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
603 #define RED_TABLE AS_REG_7
604
605 ASL(0)
606 AS2( movdqu xmm4, [WORD_REG(cx)] )
607 AS2( pxor xmm0, xmm4 )
608
609 AS2( movd ebx, xmm0 )
610 AS2( mov eax, AS_HEX(f0f0f0f0) )
611 AS2( and eax, ebx )
612 AS2( shl ebx, 4 )
613 AS2( and ebx, AS_HEX(f0f0f0f0) )
614 AS2( movzx edi, ah )
615 AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
616 AS2( movzx edi, al )
617 AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
618 AS2( shr eax, 16 )
619 AS2( movzx edi, ah )
620 AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
621 AS2( movzx edi, al )
622 AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
623
624 #define SSE2_MUL_32BITS(i) \
625 AS2( psrldq xmm0, 4 )\
626 AS2( movd eax, xmm0 )\
627 AS2( and eax, AS_HEX(f0f0f0f0) )\
628 AS2( movzx edi, bh )\
629 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
630 AS2( movzx edi, bl )\
631 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
632 AS2( shr ebx, 16 )\
633 AS2( movzx edi, bh )\
634 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
635 AS2( movzx edi, bl )\
636 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
637 AS2( movd ebx, xmm0 )\
638 AS2( shl ebx, 4 )\
639 AS2( and ebx, AS_HEX(f0f0f0f0) )\
640 AS2( movzx edi, ah )\
641 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
642 AS2( movzx edi, al )\
643 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
644 AS2( shr eax, 16 )\
645 AS2( movzx edi, ah )\
646 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
647 AS2( movzx edi, al )\
648 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
649
650 SSE2_MUL_32BITS(1)
651 SSE2_MUL_32BITS(2)
652 SSE2_MUL_32BITS(3)
653
654 AS2( movzx edi, bh )
655 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
656 AS2( movzx edi, bl )
657 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
658 AS2( shr ebx, 16 )
659 AS2( movzx edi, bh )
660 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
661 AS2( movzx edi, bl )
662 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
663
664 AS2( movdqa xmm0, xmm3 )
665 AS2( pslldq xmm3, 1 )
666 AS2( pxor xmm2, xmm3 )
667 AS2( movdqa xmm1, xmm2 )
668 AS2( pslldq xmm2, 1 )
669 AS2( pxor xmm5, xmm2 )
670
671 AS2( psrldq xmm0, 15 )
672#if USE_MOVD_REG32
673 AS2( movd edi, xmm0 )
674#elif USE_MOV_REG32_OR_REG64
675 AS2( mov WORD_REG(di), xmm0 )
676#else // GNU Assembler
677 AS2( movd WORD_REG(di), xmm0 )
678#endif
679 AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
680 AS2( shl eax, 8 )
681
682 AS2( movdqa xmm0, xmm5 )
683 AS2( pslldq xmm5, 1 )
684 AS2( pxor xmm4, xmm5 )
685
686 AS2( psrldq xmm1, 15 )
687#if USE_MOVD_REG32
688 AS2( movd edi, xmm1 )
689#elif USE_MOV_REG32_OR_REG64
690 AS2( mov WORD_REG(di), xmm1 )
691#else
692 AS2( movd WORD_REG(di), xmm1 )
693#endif
694 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
695 AS2( shl eax, 8 )
696
697 AS2( psrldq xmm0, 15 )
698#if USE_MOVD_REG32
699 AS2( movd edi, xmm0 )
700#elif USE_MOV_REG32_OR_REG64
701 AS2( mov WORD_REG(di), xmm0 )
702#else
703 AS2( movd WORD_REG(di), xmm0 )
704#endif
705 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
706
707 AS2( movd xmm0, eax )
708 AS2( pxor xmm0, xmm4 )
709
710 AS2( add WORD_REG(cx), 16 )
711 AS2( sub WORD_REG(dx), 1 )
712 // ATT_NOPREFIX
713 ASJ( jnz, 0, b )
714 INTEL_NOPREFIX
715 AS2( movdqa [WORD_REG(si)], xmm0 )
716
718 AS1(pop rbp)
719 AS1(pop rbx)
720 #else
721 AS_POP_IF86( bp)
722 AS_POP_IF86( bx)
723 #endif
724
725 #ifdef __GNUC__
726 ATT_PREFIX
727 :
728 : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
729 : "memory", "cc", "%eax", "%ebx"
731 , PERCENT_REG(AS_REG_7), "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5"
732#endif
733 );
734 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
735 pop rbx
736 pop rdi
737 pop rsi
738 ret
739 GCM_AuthenticateBlocks_2K_SSE2 ENDP
740 #endif
741
742 return len%16;
743 }
744 case 3: // SSE2 and 64K tables
745 {
746 #ifdef __GNUC__
747 __asm__ __volatile__
748 (
749 INTEL_NOPREFIX
750 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
751 ALIGN 8
752 GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
753 rex_push_reg rsi
754 push_reg rdi
755 .endprolog
756 mov rsi, r8
757 #else
758 AS2( mov WORD_REG(cx), data )
759 AS2( mov WORD_REG(dx), len )
760 AS2( mov WORD_REG(si), hashBuffer )
761 AS2( shr WORD_REG(dx), 4 )
762 #endif
763
764 AS2( movdqa xmm0, [WORD_REG(si)] )
765
766 #undef MUL_TABLE
767 #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
768
769 ASL(1)
770 AS2( movdqu xmm1, [WORD_REG(cx)] )
771 AS2( pxor xmm1, xmm0 )
772 AS2( pxor xmm0, xmm0 )
773
774 #undef SSE2_MUL_32BITS
775 #define SSE2_MUL_32BITS(i) \
776 AS2( movd eax, xmm1 )\
777 AS2( psrldq xmm1, 4 )\
778 AS2( movzx edi, al )\
779 AS2( add WORD_REG(di), WORD_REG(di) )\
780 AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
781 AS2( movzx edi, ah )\
782 AS2( add WORD_REG(di), WORD_REG(di) )\
783 AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
784 AS2( shr eax, 16 )\
785 AS2( movzx edi, al )\
786 AS2( add WORD_REG(di), WORD_REG(di) )\
787 AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
788 AS2( movzx edi, ah )\
789 AS2( add WORD_REG(di), WORD_REG(di) )\
790 AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
791
792 SSE2_MUL_32BITS(0)
793 SSE2_MUL_32BITS(1)
794 SSE2_MUL_32BITS(2)
795 SSE2_MUL_32BITS(3)
796
797 AS2( add WORD_REG(cx), 16 )
798 AS2( sub WORD_REG(dx), 1 )
799 // ATT_NOPREFIX
800 ASJ( jnz, 1, b )
801 INTEL_NOPREFIX
802 AS2( movdqa [WORD_REG(si)], xmm0 )
803
804 #ifdef __GNUC__
805 ATT_PREFIX
806 :
807 : "c" (data), "d" (len/16), "S" (hashBuffer)
808 : "memory", "cc", "%edi", "%eax"
810 , "%xmm0", "%xmm1"
811#endif
812 );
813 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
814 pop rdi
815 pop rsi
816 ret
817 GCM_AuthenticateBlocks_64K_SSE2 ENDP
818 #endif
819
820 return len%16;
821 }
822#endif
823#ifndef CRYPTOPP_GENERATE_X64_MASM
824 }
825
826 return len%16;
827}
828
829void GCM_Base::AuthenticateLastHeaderBlock()
830{
831 if (m_bufferedDataLength > 0)
832 {
833 memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
834 m_bufferedDataLength = 0;
835 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
836 }
837}
838
839void GCM_Base::AuthenticateLastConfidentialBlock()
840{
841 GCM_Base::AuthenticateLastHeaderBlock();
842 PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
843 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
844}
845
846void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
847{
848 m_ctr.Seek(0);
849 ReverseHashBufferIfNeeded();
850 m_ctr.ProcessData(mac, HashBuffer(), macSize);
851}
852
853NAMESPACE_END
854
855#endif // Not CRYPTOPP_GENERATE_X64_MASM
856#endif
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1283
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:879
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: gcm.h:36
An invalid argument was detected.
Definition: cryptlib.h:203
Interface for retrieving values given their names.
Definition: cryptlib.h:322
CRYPTOPP_DLL bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:415
Access a block of memory.
Definition: misc.h:2807
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
Definition: config_cpu.h:52
#define CRYPTOPP_BOOL_X32
32-bit x32 platform
Definition: config_cpu.h:44
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition: config_cpu.h:48
#define W64LIT(x)
Declare an unsigned word64.
Definition: config_int.h:119
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned short word16
16-bit unsigned datatype
Definition: config_int.h:59
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
GCM block cipher mode of operation.
@ GCM_64K_Tables
Use a table with 64K entries.
Definition: gcm.h:27
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:2022
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition: misc.h:1299
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1227
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2208
Crypto++ library namespace.
const char * TableSize()
int, in bytes
Definition: argnames.h:81
const char * BlockSize()
int, in bytes
Definition: argnames.h:27
Precompiled header file.
Access a block of memory.
Definition: misc.h:2844
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68