Crypto++ 8.7
Free C++ class library of cryptographic schemes
sosemanuk.cpp
1// sosemanuk.cpp - originally written and placed in the public domain by Wei Dai
2
3// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sosemanuk.cpp" to generate MASM code
4
5#include "pch.h"
6#include "config.h"
7
8#if CRYPTOPP_MSC_VERSION
9# pragma warning(disable: 4702 4731)
10#endif
11
12#ifndef CRYPTOPP_GENERATE_X64_MASM
13
14#include "sosemanuk.h"
15#include "serpentp.h"
16#include "secblock.h"
17#include "misc.h"
18#include "cpu.h"
19
20NAMESPACE_BEGIN(CryptoPP)
21
22std::string SosemanukPolicy::AlgorithmProvider() const
23{
24#ifndef CRYPTOPP_DISABLE_SOSEMANUK_ASM
25# if CRYPTOPP_SSE2_ASM_AVAILABLE
26 if (HasSSE2())
27 return "SSE2";
28# endif
29#endif
30 return "C++";
31}
32
33void SosemanukPolicy::CipherSetKey(const NameValuePairs &params, const byte *userKey, size_t keylen)
34{
35 CRYPTOPP_UNUSED(params);
36 Serpent_KeySchedule(m_key, 24, userKey, keylen);
37}
38
39void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
40{
41 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(iv), CRYPTOPP_UNUSED(length);
42 CRYPTOPP_ASSERT(length==16);
43
44 word32 a, b, c, d, e;
45
47 Block::Get(iv)(a)(b)(c)(d);
48
49 const word32 *k = m_key;
50 unsigned int i=1;
51
52 do
53 {
54 beforeS0(KX); beforeS0(S0); afterS0(LT);
55 afterS0(KX); afterS0(S1); afterS1(LT);
56 if (i == 3) // after 18th round
57 {
58 m_state[4] = b;
59 m_state[5] = e;
60 m_state[10] = c;
61 m_state[11] = a;
62 }
63 afterS1(KX); afterS1(S2); afterS2(LT);
64 afterS2(KX); afterS2(S3); afterS3(LT);
65 if (i == 2) // after 12th round
66 {
67 m_state[6] = c;
68 m_state[7] = d;
69 m_state[8] = b;
70 m_state[9] = e;
71 }
72 afterS3(KX); afterS3(S4); afterS4(LT);
73 afterS4(KX); afterS4(S5); afterS5(LT);
74 afterS5(KX); afterS5(S6); afterS6(LT);
75 afterS6(KX); afterS6(S7); afterS7(LT);
76
77 if (i == 3)
78 break;
79
80 ++i;
81 c = b;
82 b = e;
83 e = d;
84 d = a;
85 a = e;
86 k += 32;
87 }
88 while (true);
89
90 afterS7(KX);
91
92 m_state[0] = a;
93 m_state[1] = b;
94 m_state[2] = e;
95 m_state[3] = d;
96
97#define XMUX(c, x, y) (x ^ (y & (0 - (c & 1))))
98 m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
99 m_state[10] = rotlConstant<7>(m_state[10] * 0x54655307);
100}
101
102extern "C" {
103word32 s_sosemanukMulTables[512] = {
104#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
105 0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836,
106 0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E,
107 0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6,
108 0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE,
109 0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF,
110 0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7,
111 0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F,
112 0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67,
113 0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D,
114 0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5,
115 0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D,
116 0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855,
117 0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04,
118 0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C,
119 0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794,
120 0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC,
121 0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9,
122 0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1,
123 0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079,
124 0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31,
125 0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60,
126 0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328,
127 0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0,
128 0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8,
129 0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52,
130 0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A,
131 0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2,
132 0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A,
133 0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB,
134 0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193,
135 0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B,
136 0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03,
137 0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021,
138 0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69,
139 0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1,
140 0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9,
141 0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8,
142 0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0,
143 0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38,
144 0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370,
145 0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A,
146 0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2,
147 0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A,
148 0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042,
149 0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313,
150 0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B,
151 0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83,
152 0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB,
153 0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE,
154 0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6,
155 0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E,
156 0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626,
157 0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577,
158 0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F,
159 0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7,
160 0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF,
161 0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645,
162 0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D,
163 0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5,
164 0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D,
165 0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC,
166 0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984,
167 0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C,
168 0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
169#else
170 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
171 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
172 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
173 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
174 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
175 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
176 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
177 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
178 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
179 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
180 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
181 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
182 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
183 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
184 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
185 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
186 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
187 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
188 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
189 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
190 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
191 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
192 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
193 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
194 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
195 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
196 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
197 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
198 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
199 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
200 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
201 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
202 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
203 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
204 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
205 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
206 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
207 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
208 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
209 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
210 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
211 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
212 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
213 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
214 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
215 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
216 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
217 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
218 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
219 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
220 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
221 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
222 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
223 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
224 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
225 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
226 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
227 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
228 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
229 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
230 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
231 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
232 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
233 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
234#endif
235 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
236 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
237 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
238 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
239 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
240 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
241 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
242 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
243 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
244 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
245 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
246 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
247 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
248 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
249 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
250 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
251 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
252 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
253 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
254 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
255 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
256 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
257 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
258 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
259 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
260 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
261 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
262 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
263 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
264 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
265 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
266 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
267 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
268 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
269 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
270 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
271 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
272 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
273 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
274 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
275 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
276 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
277 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
278 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
279 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
280 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
281 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
282 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
283 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
284 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
285 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
286 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
287 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
288 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
289 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
290 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
291 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
292 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
293 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
294 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
295 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
296 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
297 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
298 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
299};
300}
301
302#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
303unsigned int SosemanukPolicy::GetAlignment() const
304{
305#if CRYPTOPP_SSE2_ASM_AVAILABLE
306#ifdef __INTEL_COMPILER
307 if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
308#else
309 if (HasSSE2())
310#endif
311 return 16;
312 else
313#endif
314 return GetAlignmentOf<word32>();
315}
316
317unsigned int SosemanukPolicy::GetOptimalBlockSize() const
318{
319#if CRYPTOPP_SSE2_ASM_AVAILABLE
320#ifdef __INTEL_COMPILER
321 if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
322#else
323 if (HasSSE2())
324#endif
325 return 4*BYTES_PER_ITERATION;
326 else
327#endif
328 return BYTES_PER_ITERATION;
329}
330#endif
331
332#ifdef CRYPTOPP_X64_MASM_AVAILABLE
333extern "C" {
334void Sosemanuk_OperateKeystream(size_t iterationCount, const byte *input, byte *output, word32 *state);
335}
336#endif
337
338void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
339{
340#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
341
342#ifdef CRYPTOPP_X64_MASM_AVAILABLE
343 Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data());
344 return;
345#endif
346
347#if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
348#ifdef CRYPTOPP_GENERATE_X64_MASM
349 ALIGN 8
350 Sosemanuk_OperateKeystream PROC FRAME
351 rex_push_reg rsi
352 push_reg rdi
353 alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8)
354 save_xmm128 xmm6, 02f0h
355 save_xmm128 xmm7, 0300h
356 .endprolog
357 mov rdi, r8
358 mov rax, r9
359#else
360#ifdef __INTEL_COMPILER
361 if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
362#else
363 if (HasSSE2())
364#endif
365 {
366#ifdef __GNUC__
367 #if CRYPTOPP_BOOL_X64
369 #endif
370 __asm__ __volatile__
371 (
372 INTEL_NOPREFIX
373 AS_PUSH_IF86( bx)
374#else
375 word32 *state = m_state;
376 AS2( mov WORD_REG(ax), state)
377 AS2( mov WORD_REG(di), output)
378 AS2( mov WORD_REG(dx), input)
379 AS2( mov WORD_REG(cx), iterationCount)
380#endif
381#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
382
383#if defined(__GNUC__) && CRYPTOPP_BOOL_X64
384 #define SSE2_workspace %5
385#else
386 #define SSE2_workspace WORD_REG(sp)
387#endif
388
389#define SSE2_output WORD_PTR [SSE2_workspace+1*WORD_SZ]
390#define SSE2_input WORD_PTR [SSE2_workspace+2*WORD_SZ]
391#define SSE2_wordsLeft WORD_PTR [SSE2_workspace+3*WORD_SZ]
392#define SSE2_diEnd WORD_PTR [SSE2_workspace+4*WORD_SZ]
393#define SSE2_pMulTables WORD_PTR [SSE2_workspace+5*WORD_SZ]
394#define SSE2_state WORD_PTR [SSE2_workspace+6*WORD_SZ]
395#define SSE2_wordsLeft2 WORD_PTR [SSE2_workspace+7*WORD_SZ]
396#define SSE2_stateCopy SSE2_workspace + 8*WORD_SZ
397#define SSE2_uvStart SSE2_stateCopy + 12*4
398
399#if (CRYPTOPP_BOOL_X86) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
400 AS_PUSH_IF86( bp)
401 AS2( mov AS_REG_6, esp)
402 AS2( and esp, -16)
403 AS2( sub esp, 80*4*2+12*4+8*WORD_SZ) // 80 v's, 80 u's, 12 state, 8 locals
404 AS2( mov [esp], AS_REG_6)
405#endif
406 AS2( mov SSE2_output, WORD_REG(di))
407 AS2( mov SSE2_input, WORD_REG(dx))
408 AS2( mov SSE2_state, WORD_REG(ax))
409#ifndef _MSC_VER
410 AS2( mov SSE2_pMulTables, WORD_REG(si))
411#endif
412 AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
413 AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
414 AS2( mov SSE2_wordsLeft, WORD_REG(si))
415 AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register
416 AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
417 AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
418 AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
419 AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
420 AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
421 AS2( psrlq xmm0, 32)
422 AS2( movd AS_REG_6d, xmm0) // s(9)
423 AS2( mov ecx, [WORD_REG(ax)+10*4])
424 AS2( mov edx, [WORD_REG(ax)+11*4])
425 AS2( pcmpeqb xmm7, xmm7) // all ones
426
427#define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4
428#define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
429#define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
430
431#define R10 ecx
432#define R11 edx
433#define R20 edx
434#define R21 ecx
435// workaround bug in GAS 2.15
436#define R20r WORD_REG(dx)
437#define R21r WORD_REG(cx)
438
439#define SSE2_STEP(i, j) \
440 AS2( mov eax, [s(i+0)])\
441 AS2( mov [v(i)], eax)\
442 AS2( rol eax, 8)\
443 AS2( lea AS_REG_7, [AS_REG_6 + R2##j##r])\
444 AS2( xor AS_REG_7d, R1##j)\
445 AS2( mov [u(i)], AS_REG_7d)\
446 AS2( mov AS_REG_7d, 1)\
447 AS2( and AS_REG_7d, R2##j)\
448 AS1( neg AS_REG_7d)\
449 AS2( and AS_REG_7d, AS_REG_6d)\
450 AS2( xor AS_REG_6d, eax)\
451 AS2( movzx eax, al)\
452 AS2( xor AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\
453 AS2( mov eax, [s(i+3)])\
454 AS2( xor AS_REG_7d, [s(i+2)])\
455 AS2( add R1##j, AS_REG_7d)\
456 AS2( movzx AS_REG_7d, al)\
457 AS2( shr eax, 8)\
458 AS2( xor AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\
459 AS2( xor AS_REG_6d, eax)\
460 AS2( imul R2##j, AS_HEX(54655307))\
461 AS2( rol R2##j, 7)\
462 AS2( mov [s(i+0)], AS_REG_6d)\
463
464 ASL(2) // outer loop, each iteration of this processes 80 words
465 AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u
466 AS2( mov WORD_REG(ax), 80)
467 AS2( cmp WORD_REG(si), 80)
468 AS2( cmovg WORD_REG(si), WORD_REG(ax))
469 AS2( mov SSE2_wordsLeft2, WORD_REG(si))
470 AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)]) // use to end first inner loop
471 AS2( mov SSE2_diEnd, WORD_REG(si))
472#ifdef _MSC_VER
473 AS2( lea WORD_REG(si), s_sosemanukMulTables)
474#else
475 AS2( mov WORD_REG(si), SSE2_pMulTables)
476#endif
477
478 ASL(0) // first inner loop, 20 words each, 4 iterations
479 SSE2_STEP(0, 0)
480 SSE2_STEP(1, 1)
481 SSE2_STEP(2, 0)
482 SSE2_STEP(3, 1)
483 SSE2_STEP(4, 0)
484 SSE2_STEP(5, 1)
485 SSE2_STEP(6, 0)
486 SSE2_STEP(7, 1)
487 SSE2_STEP(8, 0)
488 SSE2_STEP(9, 1)
489 SSE2_STEP(10, 0)
490 SSE2_STEP(11, 1)
491 SSE2_STEP(12, 0)
492 SSE2_STEP(13, 1)
493 SSE2_STEP(14, 0)
494 SSE2_STEP(15, 1)
495 SSE2_STEP(16, 0)
496 SSE2_STEP(17, 1)
497 SSE2_STEP(18, 0)
498 SSE2_STEP(19, 1)
499 // loop
500 AS2( add WORD_REG(di), 5*4)
501 AS2( cmp WORD_REG(di), SSE2_diEnd)
502 ASJ( jne, 0, b)
503
504 AS2( mov WORD_REG(ax), SSE2_input)
505 AS2( mov AS_REG_7, SSE2_output)
506 AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u
507 AS2( mov WORD_REG(si), SSE2_wordsLeft2)
508
509 ASL(1) // second inner loop, 16 words each, 5 iterations
510 AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
511 AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
512 AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
513 AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
514 // S2
515 AS2( movdqa xmm4, xmm0)
516 AS2( pand xmm0, xmm2)
517 AS2( pxor xmm0, xmm3)
518 AS2( pxor xmm2, xmm1)
519 AS2( pxor xmm2, xmm0)
520 AS2( por xmm3, xmm4)
521 AS2( pxor xmm3, xmm1)
522 AS2( pxor xmm4, xmm2)
523 AS2( movdqa xmm1, xmm3)
524 AS2( por xmm3, xmm4)
525 AS2( pxor xmm3, xmm0)
526 AS2( pand xmm0, xmm1)
527 AS2( pxor xmm4, xmm0)
528 AS2( pxor xmm1, xmm3)
529 AS2( pxor xmm1, xmm4)
530 AS2( pxor xmm4, xmm7)
531 // xor with v
532 AS2( pxor xmm2, [WORD_REG(di)+80*4])
533 AS2( pxor xmm3, [WORD_REG(di)+80*5])
534 AS2( pxor xmm1, [WORD_REG(di)+80*6])
535 AS2( pxor xmm4, [WORD_REG(di)+80*7])
536 // exit loop early if less than 16 words left to output
537 // this is necessary because block size is 20 words, and we output 16 words in each iteration of this loop
538 AS2( cmp WORD_REG(si), 16)
539 ASJ( jl, 4, f)
540 // unpack
541 AS2( movdqa xmm6, xmm2)
542 AS2( punpckldq xmm2, xmm3)
543 AS2( movdqa xmm5, xmm1)
544 AS2( punpckldq xmm1, xmm4)
545 AS2( movdqa xmm0, xmm2)
546 AS2( punpcklqdq xmm2, xmm1)
547 AS2( punpckhqdq xmm0, xmm1)
548 AS2( punpckhdq xmm6, xmm3)
549 AS2( punpckhdq xmm5, xmm4)
550 AS2( movdqa xmm3, xmm6)
551 AS2( punpcklqdq xmm6, xmm5)
552 AS2( punpckhqdq xmm3, xmm5)
553
554 // output keystream
555 AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4)
556
557 // loop
558 AS2( add WORD_REG(di), 4*4)
559 AS2( sub WORD_REG(si), 16)
560 ASJ( jnz, 1, b)
561
562 // outer loop
563 AS2( mov WORD_REG(si), SSE2_wordsLeft)
564 AS2( sub WORD_REG(si), 80)
565 ASJ( jz, 6, f)
566 AS2( mov SSE2_wordsLeft, WORD_REG(si))
567 AS2( mov SSE2_input, WORD_REG(ax))
568 AS2( mov SSE2_output, AS_REG_7)
569 ASJ( jmp, 2, b)
570
571 ASL(4) // final output of less than 16 words
572 AS2( test WORD_REG(ax), WORD_REG(ax))
573 ASJ( jz, 5, f)
574 AS2( movd xmm0, dword ptr [WORD_REG(ax)+0*4])
575 AS2( pxor xmm2, xmm0)
576 AS2( movd xmm0, dword ptr [WORD_REG(ax)+1*4])
577 AS2( pxor xmm3, xmm0)
578 AS2( movd xmm0, dword ptr [WORD_REG(ax)+2*4])
579 AS2( pxor xmm1, xmm0)
580 AS2( movd xmm0, dword ptr [WORD_REG(ax)+3*4])
581 AS2( pxor xmm4, xmm0)
582 AS2( add WORD_REG(ax), 16)
583 ASL(5)
584 AS2( movd dword ptr [AS_REG_7+0*4], xmm2)
585 AS2( movd dword ptr [AS_REG_7+1*4], xmm3)
586 AS2( movd dword ptr [AS_REG_7+2*4], xmm1)
587 AS2( movd dword ptr [AS_REG_7+3*4], xmm4)
588 AS2( sub WORD_REG(si), 4)
589 ASJ( jz, 6, f)
590 AS2( add AS_REG_7, 16)
591 AS2( psrldq xmm2, 4)
592 AS2( psrldq xmm3, 4)
593 AS2( psrldq xmm1, 4)
594 AS2( psrldq xmm4, 4)
595 ASJ( jmp, 4, b)
596
597 ASL(6) // save state
598 AS2( mov AS_REG_6, SSE2_state)
599 AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
600 AS2( movdqa [AS_REG_6+0*16], xmm0)
601 AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
602 AS2( movdqa [AS_REG_6+1*16], xmm0)
603 AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
604 AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
605 AS2( mov [AS_REG_6+10*4], ecx)
606 AS2( mov [AS_REG_6+11*4], edx)
607
608 AS_POP_IF86( sp)
609 AS_POP_IF86( bp)
610
611#ifdef __GNUC__
612 AS_POP_IF86( bx)
613 ATT_PREFIX
614 :
615 : "a" (m_state.data()), "c" (iterationCount), "S" (s_sosemanukMulTables), "D" (output), "d" (input)
617 , "r" (workspace.data())
618 : "memory", "cc", "%r9", "%r10", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
619 #else
620 : "memory", "cc"
621 #endif
622 );
623#endif
624#ifdef CRYPTOPP_GENERATE_X64_MASM
625 movdqa xmm6, [rsp + 02f0h]
626 movdqa xmm7, [rsp + 0300h]
627 add rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8
628 pop rdi
629 pop rsi
630 ret
631 Sosemanuk_OperateKeystream ENDP
632#else
633 }
634 else
635#endif
636#endif
637#ifndef CRYPTOPP_GENERATE_X64_MASM
638 {
639#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
640#define MUL_A(x) (x = (rotlConstant<8>(x)), x ^ s_sosemanukMulTables[byte(x)])
641#else
642#define MUL_A(x) (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24])
643#endif
644
645#define DIV_A(x) (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)])
646
647#define r1(i) ((i%2) ? reg2 : reg1)
648#define r2(i) ((i%2) ? reg1 : reg2)
649
650#define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \
651 u = (s##x9 + r2(x0)) ^ r1(x0);\
652 t = v = s##x0;\
653 s##x0 = MUL_A(t) ^ DIV_A(s##x3) ^ s##x9;\
654 r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
655 r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
656
657#define SOSEMANUK_OUTPUT(x) \
658 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
659 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
660 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
661 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
662
663#define OUTPUT4 \
664 S2(0, u0, u1, u2, u3, u4);\
665 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
666
667 word32 s0 = m_state[0];
668 word32 s1 = m_state[1];
669 word32 s2 = m_state[2];
670 word32 s3 = m_state[3];
671 word32 s4 = m_state[4];
672 word32 s5 = m_state[5];
673 word32 s6 = m_state[6];
674 word32 s7 = m_state[7];
675 word32 s8 = m_state[8];
676 word32 s9 = m_state[9];
677 word32 reg1 = m_state[10];
678 word32 reg2 = m_state[11];
679 word32 t, u0, u1, u2, u3, u4, v0, v1, v2, v3;
680
681 do
682 {
683 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
684 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
685 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
686 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
687 OUTPUT4
688 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
689 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
690 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
691 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
692 OUTPUT4
693 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
694 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
695 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
696 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
697 OUTPUT4
698 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
699 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
700 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
701 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
702 OUTPUT4
703 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
704 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
705 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
706 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
707 OUTPUT4
708 }
709 while (--iterationCount);
710
711 m_state[0] = s0;
712 m_state[1] = s1;
713 m_state[2] = s2;
714 m_state[3] = s3;
715 m_state[4] = s4;
716 m_state[5] = s5;
717 m_state[6] = s6;
718 m_state[7] = s7;
719 m_state[8] = s8;
720 m_state[9] = s9;
721 m_state[10] = reg1;
722 m_state[11] = reg2;
723 }
724}
725
726NAMESPACE_END
727
728#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:1259
Interface for retrieving values given their names.
Definition: cryptlib.h:322
A::pointer data()
Provides a pointer to the first element in the memory block.
Definition: secblock.h:857
Library configuration file.
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition: config_cpu.h:48
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
word128 dword
Double word used for multiprecision integer arithmetic.
Definition: config_int.h:193
Functions for CPU features and intrinsics.
Utility functions for the Crypto++ library.
Crypto++ library namespace.
Precompiled header file.
Classes and functions for secure memory allocations.
Classes for Sosemanuk stream cipher.
KeystreamOperation
Keystream operation flags.
Definition: strciphr.h:88
static const int BYTES_PER_ITERATION
Number of bytes for an iteration.
Definition: strciphr.h:211
Access a block of memory.
Definition: misc.h:2844
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68