1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) |
4 | * |
5 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
6 | * |
7 | * Based on crypto/serpent.c by |
8 | * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> |
9 | * 2003 Herbert Valerio Riedel <hvr@gnu.org> |
10 | */ |
11 | |
12 | #include <linux/linkage.h> |
13 | |
14 | .file "serpent-sse2-x86_64-asm_64.S" |
15 | .text |
16 | |
17 | #define CTX %rdi |
18 | |
19 | /********************************************************************** |
20 | 8-way SSE2 serpent |
21 | **********************************************************************/ |
22 | #define RA1 %xmm0 |
23 | #define RB1 %xmm1 |
24 | #define RC1 %xmm2 |
25 | #define RD1 %xmm3 |
26 | #define RE1 %xmm4 |
27 | |
28 | #define RA2 %xmm5 |
29 | #define RB2 %xmm6 |
30 | #define RC2 %xmm7 |
31 | #define RD2 %xmm8 |
32 | #define RE2 %xmm9 |
33 | |
34 | #define RNOT %xmm10 |
35 | |
36 | #define RK0 %xmm11 |
37 | #define RK1 %xmm12 |
38 | #define RK2 %xmm13 |
39 | #define RK3 %xmm14 |
40 | |
41 | #define S0_1(x0, x1, x2, x3, x4) \ |
42 | movdqa x3, x4; \ |
43 | por x0, x3; \ |
44 | pxor x4, x0; \ |
45 | pxor x2, x4; \ |
46 | pxor RNOT, x4; \ |
47 | pxor x1, x3; \ |
48 | pand x0, x1; \ |
49 | pxor x4, x1; \ |
50 | pxor x0, x2; |
51 | #define S0_2(x0, x1, x2, x3, x4) \ |
52 | pxor x3, x0; \ |
53 | por x0, x4; \ |
54 | pxor x2, x0; \ |
55 | pand x1, x2; \ |
56 | pxor x2, x3; \ |
57 | pxor RNOT, x1; \ |
58 | pxor x4, x2; \ |
59 | pxor x2, x1; |
60 | |
61 | #define S1_1(x0, x1, x2, x3, x4) \ |
62 | movdqa x1, x4; \ |
63 | pxor x0, x1; \ |
64 | pxor x3, x0; \ |
65 | pxor RNOT, x3; \ |
66 | pand x1, x4; \ |
67 | por x1, x0; \ |
68 | pxor x2, x3; \ |
69 | pxor x3, x0; \ |
70 | pxor x3, x1; |
71 | #define S1_2(x0, x1, x2, x3, x4) \ |
72 | pxor x4, x3; \ |
73 | por x4, x1; \ |
74 | pxor x2, x4; \ |
75 | pand x0, x2; \ |
76 | pxor x1, x2; \ |
77 | por x0, x1; \ |
78 | pxor RNOT, x0; \ |
79 | pxor x2, x0; \ |
80 | pxor x1, x4; |
81 | |
82 | #define S2_1(x0, x1, x2, x3, x4) \ |
83 | pxor RNOT, x3; \ |
84 | pxor x0, x1; \ |
85 | movdqa x0, x4; \ |
86 | pand x2, x0; \ |
87 | pxor x3, x0; \ |
88 | por x4, x3; \ |
89 | pxor x1, x2; \ |
90 | pxor x1, x3; \ |
91 | pand x0, x1; |
92 | #define S2_2(x0, x1, x2, x3, x4) \ |
93 | pxor x2, x0; \ |
94 | pand x3, x2; \ |
95 | por x1, x3; \ |
96 | pxor RNOT, x0; \ |
97 | pxor x0, x3; \ |
98 | pxor x0, x4; \ |
99 | pxor x2, x0; \ |
100 | por x2, x1; |
101 | |
102 | #define S3_1(x0, x1, x2, x3, x4) \ |
103 | movdqa x1, x4; \ |
104 | pxor x3, x1; \ |
105 | por x0, x3; \ |
106 | pand x0, x4; \ |
107 | pxor x2, x0; \ |
108 | pxor x1, x2; \ |
109 | pand x3, x1; \ |
110 | pxor x3, x2; \ |
111 | por x4, x0; \ |
112 | pxor x3, x4; |
113 | #define S3_2(x0, x1, x2, x3, x4) \ |
114 | pxor x0, x1; \ |
115 | pand x3, x0; \ |
116 | pand x4, x3; \ |
117 | pxor x2, x3; \ |
118 | por x1, x4; \ |
119 | pand x1, x2; \ |
120 | pxor x3, x4; \ |
121 | pxor x3, x0; \ |
122 | pxor x2, x3; |
123 | |
124 | #define S4_1(x0, x1, x2, x3, x4) \ |
125 | movdqa x3, x4; \ |
126 | pand x0, x3; \ |
127 | pxor x4, x0; \ |
128 | pxor x2, x3; \ |
129 | por x4, x2; \ |
130 | pxor x1, x0; \ |
131 | pxor x3, x4; \ |
132 | por x0, x2; \ |
133 | pxor x1, x2; |
134 | #define S4_2(x0, x1, x2, x3, x4) \ |
135 | pand x0, x1; \ |
136 | pxor x4, x1; \ |
137 | pand x2, x4; \ |
138 | pxor x3, x2; \ |
139 | pxor x0, x4; \ |
140 | por x1, x3; \ |
141 | pxor RNOT, x1; \ |
142 | pxor x0, x3; |
143 | |
144 | #define S5_1(x0, x1, x2, x3, x4) \ |
145 | movdqa x1, x4; \ |
146 | por x0, x1; \ |
147 | pxor x1, x2; \ |
148 | pxor RNOT, x3; \ |
149 | pxor x0, x4; \ |
150 | pxor x2, x0; \ |
151 | pand x4, x1; \ |
152 | por x3, x4; \ |
153 | pxor x0, x4; |
154 | #define S5_2(x0, x1, x2, x3, x4) \ |
155 | pand x3, x0; \ |
156 | pxor x3, x1; \ |
157 | pxor x2, x3; \ |
158 | pxor x1, x0; \ |
159 | pand x4, x2; \ |
160 | pxor x2, x1; \ |
161 | pand x0, x2; \ |
162 | pxor x2, x3; |
163 | |
164 | #define S6_1(x0, x1, x2, x3, x4) \ |
165 | movdqa x1, x4; \ |
166 | pxor x0, x3; \ |
167 | pxor x2, x1; \ |
168 | pxor x0, x2; \ |
169 | pand x3, x0; \ |
170 | por x3, x1; \ |
171 | pxor RNOT, x4; \ |
172 | pxor x1, x0; \ |
173 | pxor x2, x1; |
174 | #define S6_2(x0, x1, x2, x3, x4) \ |
175 | pxor x4, x3; \ |
176 | pxor x0, x4; \ |
177 | pand x0, x2; \ |
178 | pxor x1, x4; \ |
179 | pxor x3, x2; \ |
180 | pand x1, x3; \ |
181 | pxor x0, x3; \ |
182 | pxor x2, x1; |
183 | |
184 | #define S7_1(x0, x1, x2, x3, x4) \ |
185 | pxor RNOT, x1; \ |
186 | movdqa x1, x4; \ |
187 | pxor RNOT, x0; \ |
188 | pand x2, x1; \ |
189 | pxor x3, x1; \ |
190 | por x4, x3; \ |
191 | pxor x2, x4; \ |
192 | pxor x3, x2; \ |
193 | pxor x0, x3; \ |
194 | por x1, x0; |
195 | #define S7_2(x0, x1, x2, x3, x4) \ |
196 | pand x0, x2; \ |
197 | pxor x4, x0; \ |
198 | pxor x3, x4; \ |
199 | pand x0, x3; \ |
200 | pxor x1, x4; \ |
201 | pxor x4, x2; \ |
202 | pxor x1, x3; \ |
203 | por x0, x4; \ |
204 | pxor x1, x4; |
205 | |
206 | #define SI0_1(x0, x1, x2, x3, x4) \ |
207 | movdqa x3, x4; \ |
208 | pxor x0, x1; \ |
209 | por x1, x3; \ |
210 | pxor x1, x4; \ |
211 | pxor RNOT, x0; \ |
212 | pxor x3, x2; \ |
213 | pxor x0, x3; \ |
214 | pand x1, x0; \ |
215 | pxor x2, x0; |
216 | #define SI0_2(x0, x1, x2, x3, x4) \ |
217 | pand x3, x2; \ |
218 | pxor x4, x3; \ |
219 | pxor x3, x2; \ |
220 | pxor x3, x1; \ |
221 | pand x0, x3; \ |
222 | pxor x0, x1; \ |
223 | pxor x2, x0; \ |
224 | pxor x3, x4; |
225 | |
226 | #define SI1_1(x0, x1, x2, x3, x4) \ |
227 | pxor x3, x1; \ |
228 | movdqa x0, x4; \ |
229 | pxor x2, x0; \ |
230 | pxor RNOT, x2; \ |
231 | por x1, x4; \ |
232 | pxor x3, x4; \ |
233 | pand x1, x3; \ |
234 | pxor x2, x1; \ |
235 | pand x4, x2; |
236 | #define SI1_2(x0, x1, x2, x3, x4) \ |
237 | pxor x1, x4; \ |
238 | por x3, x1; \ |
239 | pxor x0, x3; \ |
240 | pxor x0, x2; \ |
241 | por x4, x0; \ |
242 | pxor x4, x2; \ |
243 | pxor x0, x1; \ |
244 | pxor x1, x4; |
245 | |
246 | #define SI2_1(x0, x1, x2, x3, x4) \ |
247 | pxor x1, x2; \ |
248 | movdqa x3, x4; \ |
249 | pxor RNOT, x3; \ |
250 | por x2, x3; \ |
251 | pxor x4, x2; \ |
252 | pxor x0, x4; \ |
253 | pxor x1, x3; \ |
254 | por x2, x1; \ |
255 | pxor x0, x2; |
256 | #define SI2_2(x0, x1, x2, x3, x4) \ |
257 | pxor x4, x1; \ |
258 | por x3, x4; \ |
259 | pxor x3, x2; \ |
260 | pxor x2, x4; \ |
261 | pand x1, x2; \ |
262 | pxor x3, x2; \ |
263 | pxor x4, x3; \ |
264 | pxor x0, x4; |
265 | |
266 | #define SI3_1(x0, x1, x2, x3, x4) \ |
267 | pxor x1, x2; \ |
268 | movdqa x1, x4; \ |
269 | pand x2, x1; \ |
270 | pxor x0, x1; \ |
271 | por x4, x0; \ |
272 | pxor x3, x4; \ |
273 | pxor x3, x0; \ |
274 | por x1, x3; \ |
275 | pxor x2, x1; |
276 | #define SI3_2(x0, x1, x2, x3, x4) \ |
277 | pxor x3, x1; \ |
278 | pxor x2, x0; \ |
279 | pxor x3, x2; \ |
280 | pand x1, x3; \ |
281 | pxor x0, x1; \ |
282 | pand x2, x0; \ |
283 | pxor x3, x4; \ |
284 | pxor x0, x3; \ |
285 | pxor x1, x0; |
286 | |
287 | #define SI4_1(x0, x1, x2, x3, x4) \ |
288 | pxor x3, x2; \ |
289 | movdqa x0, x4; \ |
290 | pand x1, x0; \ |
291 | pxor x2, x0; \ |
292 | por x3, x2; \ |
293 | pxor RNOT, x4; \ |
294 | pxor x0, x1; \ |
295 | pxor x2, x0; \ |
296 | pand x4, x2; |
297 | #define SI4_2(x0, x1, x2, x3, x4) \ |
298 | pxor x0, x2; \ |
299 | por x4, x0; \ |
300 | pxor x3, x0; \ |
301 | pand x2, x3; \ |
302 | pxor x3, x4; \ |
303 | pxor x1, x3; \ |
304 | pand x0, x1; \ |
305 | pxor x1, x4; \ |
306 | pxor x3, x0; |
307 | |
308 | #define SI5_1(x0, x1, x2, x3, x4) \ |
309 | movdqa x1, x4; \ |
310 | por x2, x1; \ |
311 | pxor x4, x2; \ |
312 | pxor x3, x1; \ |
313 | pand x4, x3; \ |
314 | pxor x3, x2; \ |
315 | por x0, x3; \ |
316 | pxor RNOT, x0; \ |
317 | pxor x2, x3; \ |
318 | por x0, x2; |
319 | #define SI5_2(x0, x1, x2, x3, x4) \ |
320 | pxor x1, x4; \ |
321 | pxor x4, x2; \ |
322 | pand x0, x4; \ |
323 | pxor x1, x0; \ |
324 | pxor x3, x1; \ |
325 | pand x2, x0; \ |
326 | pxor x3, x2; \ |
327 | pxor x2, x0; \ |
328 | pxor x4, x2; \ |
329 | pxor x3, x4; |
330 | |
331 | #define SI6_1(x0, x1, x2, x3, x4) \ |
332 | pxor x2, x0; \ |
333 | movdqa x0, x4; \ |
334 | pand x3, x0; \ |
335 | pxor x3, x2; \ |
336 | pxor x2, x0; \ |
337 | pxor x1, x3; \ |
338 | por x4, x2; \ |
339 | pxor x3, x2; \ |
340 | pand x0, x3; |
341 | #define SI6_2(x0, x1, x2, x3, x4) \ |
342 | pxor RNOT, x0; \ |
343 | pxor x1, x3; \ |
344 | pand x2, x1; \ |
345 | pxor x0, x4; \ |
346 | pxor x4, x3; \ |
347 | pxor x2, x4; \ |
348 | pxor x1, x0; \ |
349 | pxor x0, x2; |
350 | |
351 | #define SI7_1(x0, x1, x2, x3, x4) \ |
352 | movdqa x3, x4; \ |
353 | pand x0, x3; \ |
354 | pxor x2, x0; \ |
355 | por x4, x2; \ |
356 | pxor x1, x4; \ |
357 | pxor RNOT, x0; \ |
358 | por x3, x1; \ |
359 | pxor x0, x4; \ |
360 | pand x2, x0; \ |
361 | pxor x1, x0; |
362 | #define SI7_2(x0, x1, x2, x3, x4) \ |
363 | pand x2, x1; \ |
364 | pxor x2, x3; \ |
365 | pxor x3, x4; \ |
366 | pand x3, x2; \ |
367 | por x0, x3; \ |
368 | pxor x4, x1; \ |
369 | pxor x4, x3; \ |
370 | pand x0, x4; \ |
371 | pxor x2, x4; |
372 | |
373 | #define get_key(i, j, t) \ |
374 | movd ( |
---|