1 | /* |
2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, |
3 | Michaƫl Peeters and Gilles Van Assche. For more information, feedback or |
4 | questions, please refer to our website: http://keccak.noekeon.org/ |
5 | |
6 | Implementation by the designers, |
7 | hereby denoted as "the implementer". |
8 | |
9 | To the extent possible under law, the implementer has waived all copyright |
10 | and related or neighboring rights to the source code in this file. |
11 | http://creativecommons.org/publicdomain/zero/1.0/ |
12 | */ |
13 | |
14 | #include <string.h> |
15 | #include "brg_endian.h" |
16 | #include "KeccakF-1600-opt64-settings.h" |
17 | #include "KeccakF-1600-interface.h" |
18 | |
19 | typedef unsigned char UINT8; |
20 | typedef unsigned long long int UINT64; |
21 | |
22 | #if defined(UseSSE) || defined(UseXOP) |
23 | #if defined(__GNUC__) |
24 | #define ALIGN __attribute__ ((aligned(32))) |
25 | #elif defined(_MSC_VER) |
26 | #define ALIGN __declspec(align(32)) |
27 | #endif |
28 | #endif |
29 | |
30 | #ifndef ALIGN |
31 | # define ALIGN |
32 | #endif |
33 | |
34 | #if defined(UseSSE) |
35 | #include <x86intrin.h> |
36 | typedef __m128i V64; |
37 | typedef __m128i V128; |
38 | typedef union { |
39 | V128 v128; |
40 | UINT64 v64[2]; |
41 | } V6464; |
42 | |
43 | #define ANDnu64(a, b) _mm_andnot_si128(a, b) |
44 | #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a)) |
45 | #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a)) |
46 | #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) |
47 | #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b) |
48 | #define XOR64(a, b) _mm_xor_si128(a, b) |
49 | #define XOReq64(a, b) a = _mm_xor_si128(a, b) |
50 | #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b) |
51 | |
52 | #define ANDnu128(a, b) _mm_andnot_si128(a, b) |
53 | #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b)) |
54 | #define CONST128(a) _mm_load_si128((const V128 *)&(a)) |
55 | #define LOAD128(a) _mm_load_si128((const V128 *)&(a)) |
56 | #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a)) |
57 | #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) |
58 | #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b) |
59 | #define XOR128(a, b) _mm_xor_si128(a, b) |
60 | #define XOReq128(a, b) a = _mm_xor_si128(a, b) |
61 | #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b) |
62 | #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b) |
63 | #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE) |
64 | #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44) |
65 | #define ZERO128() _mm_setzero_si128() |
66 | |
67 | #ifdef UseOnlySIMD64 |
68 | #include "KeccakF-1600-simd64.macros" |
69 | #else |
70 | ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09}; |
71 | #include "KeccakF-1600-simd128.macros" |
72 | #endif |
73 | |
74 | #ifdef UseBebigokimisa |
75 | #error "UseBebigokimisa cannot be used in combination with UseSSE" |
76 | #endif |
77 | #elif defined(UseXOP) |
78 | #include <x86intrin.h> |
79 | typedef __m128i V64; |
80 | typedef __m128i V128; |
81 | |
82 | #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a)) |
83 | #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a)) |
84 | #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b) |
85 | #define XOR64(a, b) _mm_xor_si128(a, b) |
86 | #define XOReq64(a, b) a = _mm_xor_si128(a, b) |
87 | |
88 | #define ANDnu128(a, b) _mm_andnot_si128(a, b) |
89 | #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b)) |
90 | #define CONST128(a) _mm_load_si128((const V128 *)&(a)) |
91 | #define LOAD128(a) _mm_load_si128((const V128 *)&(a)) |
92 | #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a)) |
93 | #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b) |
94 | #define XOR128(a, b) _mm_xor_si128(a, b) |
95 | #define XOReq128(a, b) a = _mm_xor_si128(a, b) |
96 | #define ZERO128() _mm_setzero_si128() |
97 | |
98 | #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E) |
99 | #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b) |
100 | #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b) |
101 | #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2)) |
102 | #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a)) |
103 | #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE) |
104 | #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44) |
105 | |
106 | #define ROL6464same(a, o) _mm_roti_epi64(a, o) |
107 | #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 )) |
108 | ALIGN const UINT64 rot_0_20[2] = { 0, 20}; |
109 | ALIGN const UINT64 rot_44_3[2] = {44, 3}; |
110 | ALIGN const UINT64 rot_43_45[2] = {43, 45}; |
111 | ALIGN const UINT64 rot_21_61[2] = {21, 61}; |
112 | ALIGN const UINT64 rot_14_28[2] = {14, 28}; |
113 | ALIGN const UINT64 rot_1_36[2] = { 1, 36}; |
114 | ALIGN const UINT64 rot_6_10[2] = { 6, 10}; |
115 | ALIGN const UINT64 rot_25_15[2] = {25, 15}; |
116 | ALIGN const UINT64 rot_8_56[2] = { 8, 56}; |
117 | ALIGN const UINT64 rot_18_27[2] = {18, 27}; |
118 | ALIGN const UINT64 rot_62_55[2] = {62, 55}; |
119 | ALIGN const UINT64 rot_39_41[2] = {39, 41}; |
120 | |
121 | #if defined(UseSimulatedXOP) |
122 | // For debugging purposes, when XOP is not available |
123 | #undef ROL6464 |
124 | #undef ROL6464same |
125 | #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) |
126 | V128 ROL6464(V128 a, int r0, int r1) |
127 | { |
128 | V128 a0 = ROL64(a, r0); |
129 | V128 a1 = COPY64HI2LO(ROL64(a, r1)); |
130 | return GET64LOLO(a0, a1); |
131 | } |
132 | #endif |
133 | |
134 | #include "KeccakF-1600-xop.macros" |
135 | |
136 | #ifdef UseBebigokimisa |
137 | #error "UseBebigokimisa cannot be used in combination with UseXOP" |
138 | #endif |
139 | #elif defined(UseMMX) |
140 | #include <mmintrin.h> |
141 | typedef __m64 V64; |
142 | #define ANDnu64(a, b) _mm_andnot_si64(a, b) |
143 | |
144 | #if (defined(_MSC_VER) || defined (__INTEL_COMPILER)) |
145 | #define LOAD64(a) *(V64*)&(a) |
146 | #define CONST64(a) *(V64*)&(a) |
147 | #define STORE64(a, b) *(V64*)&(a) = b |
148 | #else |
149 | #define LOAD64(a) (V64)a |
150 | #define CONST64(a) (V64)a |
151 | #define STORE64(a, b) a = (UINT64)b |
152 | #endif |
153 | #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o))) |
154 | #define XOR64(a, b) _mm_xor_si64(a, b) |
155 | #define XOReq64(a, b) a = _mm_xor_si64(a, b) |
156 | |
157 | #include "KeccakF-1600-simd64.macros" |
158 | |
159 | #ifdef UseBebigokimisa |
160 | #error "UseBebigokimisa cannot be used in combination with UseMMX" |
161 | #endif |
162 | #else |
163 | #if defined(_MSC_VER) |
164 | #define ROL64(a, offset) _rotl64(a, offset) |
165 | #elif defined(UseSHLD) |
166 | #define ROL64(x,N) ({ \ |
167 | register UINT64 __out; \ |
168 | register UINT64 __in = x; \ |
169 | __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ |
170 | __out; \ |
171 | }) |
172 | #else |
173 | #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) |
174 | #endif |
175 | |
176 | #include "KeccakF-1600-64.macros" |
177 | #endif |
178 | |
179 | #include "KeccakF-1600-unrolling.macros" |
180 | |
181 | static void KeccakPermutationOnWords(UINT64 *state) |
182 | { |
183 | declareABCDE |
184 | #if (Unrolling != 24) |
185 | unsigned int i; |
186 | #endif |
187 | |
188 | copyFromState(A, state) |
189 | rounds |
190 | #if defined(UseMMX) |
191 | _mm_empty(); |
192 | #endif |
193 | } |
194 | |
195 | static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount) |
196 | { |
197 | declareABCDE |
198 | #if (Unrolling != 24) |
199 | unsigned int i; |
200 | #endif |
201 | unsigned int j; |
202 | |
203 | for(j=0; j<laneCount; j++) |
204 | state[j] ^= input[j]; |
205 | copyFromState(A, state) |
206 | rounds |
207 | #if defined(UseMMX) |
208 | _mm_empty(); |
209 | #endif |
210 | } |
211 | |
212 | #ifdef ProvideFast576 |
213 | static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input) |
214 | { |
215 | declareABCDE |
216 | #if (Unrolling != 24) |
217 | unsigned int i; |
218 | #endif |
219 | |
220 | copyFromStateAndXor576bits(A, state, input) |
221 | rounds |
222 | #if defined(UseMMX) |
223 | _mm_empty(); |
224 | #endif |
225 | } |
226 | #endif |
227 | |
228 | #ifdef ProvideFast832 |
229 | static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input) |
230 | { |
231 | declareABCDE |
232 | #if (Unrolling != 24) |
233 | unsigned int i; |
234 | #endif |
235 | |
236 | copyFromStateAndXor832bits(A, state, input) |
237 | rounds |
238 | #if defined(UseMMX) |
239 | _mm_empty(); |
240 | #endif |
241 | } |
242 | #endif |
243 | |
244 | #ifdef ProvideFast1024 |
245 | static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input) |
246 | { |
247 | declareABCDE |
248 | #if (Unrolling != 24) |
249 | unsigned int i; |
250 | #endif |
251 | |
252 | copyFromStateAndXor1024bits(A, state, input) |
253 | rounds |
254 | #if defined(UseMMX) |
255 | _mm_empty(); |
256 | #endif |
257 | } |
258 | #endif |
259 | |
260 | #ifdef ProvideFast1088 |
261 | static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input) |
262 | { |
263 | declareABCDE |
264 | #if (Unrolling != 24) |
265 | unsigned int i; |
266 | #endif |
267 | |
268 | copyFromStateAndXor1088bits(A, state, input) |
269 | rounds |
270 | #if defined(UseMMX) |
271 | _mm_empty(); |
272 | #endif |
273 | } |
274 | #endif |
275 | |
276 | #ifdef ProvideFast1152 |
277 | static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input) |
278 | { |
279 | declareABCDE |
280 | #if (Unrolling != 24) |
281 | unsigned int i; |
282 | #endif |
283 | |
284 | copyFromStateAndXor1152bits(A, state, input) |
285 | rounds |
286 | #if defined(UseMMX) |
287 | _mm_empty(); |
288 | #endif |
289 | } |
290 | #endif |
291 | |
292 | #ifdef ProvideFast1344 |
293 | static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input) |
294 | { |
295 | declareABCDE |
296 | #if (Unrolling != 24) |
297 | unsigned int i; |
298 | #endif |
299 | |
300 | copyFromStateAndXor1344bits(A, state, input) |
301 | rounds |
302 | #if defined(UseMMX) |
303 | _mm_empty(); |
304 | #endif |
305 | } |
306 | #endif |
307 | |
308 | static void KeccakInitialize() |
309 | { |
310 | } |
311 | |
312 | static void KeccakInitializeState(unsigned char *state) |
313 | { |
314 | memset(s: state, c: 0, n: 200); |
315 | #ifdef UseBebigokimisa |
316 | ((UINT64*)state)[ 1] = ~(UINT64)0; |
317 | ((UINT64*)state)[ 2] = ~(UINT64)0; |
318 | ((UINT64*)state)[ 8] = ~(UINT64)0; |
319 | ((UINT64*)state)[12] = ~(UINT64)0; |
320 | ((UINT64*)state)[17] = ~(UINT64)0; |
321 | ((UINT64*)state)[20] = ~(UINT64)0; |
322 | #endif |
323 | } |
324 | |
325 | static void KeccakPermutation(unsigned char *state) |
326 | { |
327 | // We assume the state is always stored as words |
328 | KeccakPermutationOnWords(state: (UINT64*)state); |
329 | } |
330 | |
331 | #if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) |
332 | static void fromBytesToWord(UINT64 *word, const UINT8 *bytes) |
333 | { |
334 | unsigned int i; |
335 | |
336 | *word = 0; |
337 | for(i=0; i<(64/8); i++) |
338 | *word |= (UINT64)(bytes[i]) << (8*i); |
339 | } |
340 | #endif |
341 | |
342 | #ifdef ProvideFast576 |
343 | static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data) |
344 | { |
345 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
346 | KeccakPermutationOnWordsAfterXoring576bits(state: (UINT64*)state, input: (const UINT64*)data); |
347 | #else |
348 | UINT64 dataAsWords[9]; |
349 | unsigned int i; |
350 | |
351 | for(i=0; i<9; i++) |
352 | fromBytesToWord(dataAsWords+i, data+(i*8)); |
353 | KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords); |
354 | #endif |
355 | } |
356 | #endif |
357 | |
358 | #ifdef ProvideFast832 |
359 | static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data) |
360 | { |
361 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
362 | KeccakPermutationOnWordsAfterXoring832bits(state: (UINT64*)state, input: (const UINT64*)data); |
363 | #else |
364 | UINT64 dataAsWords[13]; |
365 | unsigned int i; |
366 | |
367 | for(i=0; i<13; i++) |
368 | fromBytesToWord(dataAsWords+i, data+(i*8)); |
369 | KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords); |
370 | #endif |
371 | } |
372 | #endif |
373 | |
374 | #ifdef ProvideFast1024 |
375 | static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data) |
376 | { |
377 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
378 | KeccakPermutationOnWordsAfterXoring1024bits(state: (UINT64*)state, input: (const UINT64*)data); |
379 | #else |
380 | UINT64 dataAsWords[16]; |
381 | unsigned int i; |
382 | |
383 | for(i=0; i<16; i++) |
384 | fromBytesToWord(dataAsWords+i, data+(i*8)); |
385 | KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords); |
386 | #endif |
387 | } |
388 | #endif |
389 | |
390 | #ifdef ProvideFast1088 |
391 | static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data) |
392 | { |
393 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
394 | KeccakPermutationOnWordsAfterXoring1088bits(state: (UINT64*)state, input: (const UINT64*)data); |
395 | #else |
396 | UINT64 dataAsWords[17]; |
397 | unsigned int i; |
398 | |
399 | for(i=0; i<17; i++) |
400 | fromBytesToWord(dataAsWords+i, data+(i*8)); |
401 | KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords); |
402 | #endif |
403 | } |
404 | #endif |
405 | |
406 | #ifdef ProvideFast1152 |
407 | static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data) |
408 | { |
409 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
410 | KeccakPermutationOnWordsAfterXoring1152bits(state: (UINT64*)state, input: (const UINT64*)data); |
411 | #else |
412 | UINT64 dataAsWords[18]; |
413 | unsigned int i; |
414 | |
415 | for(i=0; i<18; i++) |
416 | fromBytesToWord(dataAsWords+i, data+(i*8)); |
417 | KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords); |
418 | #endif |
419 | } |
420 | #endif |
421 | |
422 | #ifdef ProvideFast1344 |
423 | static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data) |
424 | { |
425 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
426 | KeccakPermutationOnWordsAfterXoring1344bits(state: (UINT64*)state, input: (const UINT64*)data); |
427 | #else |
428 | UINT64 dataAsWords[21]; |
429 | unsigned int i; |
430 | |
431 | for(i=0; i<21; i++) |
432 | fromBytesToWord(dataAsWords+i, data+(i*8)); |
433 | KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords); |
434 | #endif |
435 | } |
436 | #endif |
437 | |
438 | static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount) |
439 | { |
440 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
441 | KeccakPermutationOnWordsAfterXoring(state: (UINT64*)state, input: (const UINT64*)data, laneCount); |
442 | #else |
443 | UINT64 dataAsWords[25]; |
444 | unsigned int i; |
445 | |
446 | for(i=0; i<laneCount; i++) |
447 | fromBytesToWord(dataAsWords+i, data+(i*8)); |
448 | KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount); |
449 | #endif |
450 | } |
451 | |
452 | #if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) |
453 | static void fromWordToBytes(UINT8 *bytes, const UINT64 word) |
454 | { |
455 | unsigned int i; |
456 | |
457 | for(i=0; i<(64/8); i++) |
458 | bytes[i] = (word >> (8*i)) & 0xFF; |
459 | } |
460 | #endif |
461 | |
462 | #ifdef ProvideFast1024 |
463 | static void (const unsigned char *state, unsigned char *data) |
464 | { |
465 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
466 | memcpy(dest: data, src: state, n: 128); |
467 | #else |
468 | unsigned int i; |
469 | |
470 | for(i=0; i<16; i++) |
471 | fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); |
472 | #endif |
473 | #ifdef UseBebigokimisa |
474 | ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; |
475 | ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; |
476 | ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; |
477 | ((UINT64*)data)[12] = ~((UINT64*)data)[12]; |
478 | #endif |
479 | } |
480 | #endif |
481 | |
482 | static void (const unsigned char *state, unsigned char *data, unsigned int laneCount) |
483 | { |
484 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
485 | memcpy(dest: data, src: state, n: laneCount*8); |
486 | #else |
487 | unsigned int i; |
488 | |
489 | for(i=0; i<laneCount; i++) |
490 | fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); |
491 | #endif |
492 | #ifdef UseBebigokimisa |
493 | if (laneCount > 1) { |
494 | ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; |
495 | if (laneCount > 2) { |
496 | ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; |
497 | if (laneCount > 8) { |
498 | ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; |
499 | if (laneCount > 12) { |
500 | ((UINT64*)data)[12] = ~((UINT64*)data)[12]; |
501 | if (laneCount > 17) { |
502 | ((UINT64*)data)[17] = ~((UINT64*)data)[17]; |
503 | if (laneCount > 20) { |
504 | ((UINT64*)data)[20] = ~((UINT64*)data)[20]; |
505 | } |
506 | } |
507 | } |
508 | } |
509 | } |
510 | } |
511 | #endif |
512 | } |
513 | |