1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /*************************************************************************** |
3 | * Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * |
4 | * * |
5 | ***************************************************************************/ |
6 | |
7 | .file "twofish-i586-asm.S" |
8 | .text |
9 | |
10 | #include <linux/linkage.h> |
11 | #include <asm/asm-offsets.h> |
12 | |
13 | /* return address at 0 */ |
14 | |
15 | #define in_blk 12 /* input byte array address parameter*/ |
16 | #define out_blk 8 /* output byte array address parameter*/ |
17 | #define ctx 4 /* Twofish context structure */ |
18 | |
19 | #define a_offset 0 |
20 | #define b_offset 4 |
21 | #define c_offset 8 |
22 | #define d_offset 12 |
23 | |
24 | /* Structure of the crypto context struct*/ |
25 | |
26 | #define s0 0 /* S0 Array 256 Words each */ |
27 | #define s1 1024 /* S1 Array */ |
28 | #define s2 2048 /* S2 Array */ |
29 | #define s3 3072 /* S3 Array */ |
30 | #define w 4096 /* 8 whitening keys (word) */ |
31 | #define k 4128 /* key 1-32 ( word ) */ |
32 | |
33 | /* define a few register aliases to allow macro substitution */ |
34 | |
35 | #define R0D %eax |
36 | #define R0B %al |
37 | #define R0H %ah |
38 | |
39 | #define R1D %ebx |
40 | #define R1B %bl |
41 | #define R1H %bh |
42 | |
43 | #define R2D %ecx |
44 | #define R2B %cl |
45 | #define R2H %ch |
46 | |
47 | #define R3D %edx |
48 | #define R3B %dl |
49 | #define R3H %dh |
50 | |
51 | |
52 | /* performs input whitening */ |
53 | #define input_whitening(src,context,offset)\ |
54 | xor w+offset(context), src; |
55 | |
56 | /* performs input whitening */ |
57 | #define output_whitening(src,context,offset)\ |
58 | xor w+16+offset(context), src; |
59 | |
60 | /* |
61 | * a input register containing a (rotated 16) |
62 | * b input register containing b |
63 | * c input register containing c |
64 | * d input register containing d (already rol $1) |
65 | * operations on a and b are interleaved to increase performance |
66 | */ |
67 | #define encrypt_round(a,b,c,d,round)\ |
68 | push d ## D;\ |
69 | movzx b ## B, %edi;\ |
70 | mov s1(%ebp,%edi,4),d ## D;\ |
71 | movzx a ## B, %edi;\ |
72 | mov s2(%ebp,%edi,4),%esi;\ |
73 | movzx b ## H, %edi;\ |
74 | ror $16, b ## D;\ |
75 | xor s2(%ebp,%edi,4),d ## D;\ |
76 | movzx a ## H, %edi;\ |
77 | ror $16, a ## D;\ |
78 | xor s3(%ebp,%edi,4),%esi;\ |
79 | movzx b ## B, %edi;\ |
80 | xor s3(%ebp,%edi,4),d ## D;\ |
81 | movzx a ## B, %edi;\ |
82 | xor (%ebp,%edi,4), %esi;\ |
83 | movzx b ## H, %edi;\ |
84 | ror $15, b ## D;\ |
85 | xor (%ebp,%edi,4), d ## D;\ |
86 | movzx a ## H, %edi;\ |
87 | xor s1(%ebp,%edi,4),%esi;\ |
88 | pop %edi;\ |
89 | add d ## D, %esi;\ |
90 | add %esi, d ## D;\ |
91 | add k+round(%ebp), %esi;\ |
92 | xor %esi, c ## D;\ |
93 | rol $15, c ## D;\ |
94 | add k+4+round(%ebp),d ## D;\ |
95 | xor %edi, d ## D; |
96 | |
97 | /* |
98 | * a input register containing a (rotated 16) |
99 | * b input register containing b |
100 | * c input register containing c |
101 | * d input register containing d (already rol $1) |
102 | * operations on a and b are interleaved to increase performance |
103 | * last round has different rotations for the output preparation |
104 | */ |
105 | #define encrypt_last_round(a,b,c,d,round)\ |
106 | push d ## D;\ |
107 | movzx b ## B, %edi;\ |
108 | mov s1(%ebp,%edi,4),d ## D;\ |
109 | movzx a ## B, %edi;\ |
110 | mov s2(%ebp,%edi,4),%esi;\ |
111 | movzx b ## H, %edi;\ |
112 | ror $16, b ## D;\ |
113 | xor s2(%ebp,%edi,4),d ## D;\ |
114 | movzx a ## H, %edi;\ |
115 | ror $16, a ## D;\ |
116 | xor s3(%ebp,%edi,4),%esi;\ |
117 | movzx b ## B, %edi;\ |
118 | xor s3(%ebp,%edi,4),d ## D;\ |
119 | movzx a ## B, %edi;\ |
120 | xor (%ebp,%edi,4), %esi;\ |
121 | movzx b ## H, %edi;\ |
122 | ror $16, b ## D;\ |
123 | xor (%ebp,%edi,4), d ## D;\ |
124 | movzx a ## H, %edi;\ |
125 | xor s1(%ebp,%edi,4),%esi;\ |
126 | pop %edi;\ |
127 | add d ## D, %esi;\ |
128 | add %esi, d ## D;\ |
129 | add k+round(%ebp), %esi;\ |
130 | xor %esi, c ## D;\ |
131 | ror $1, c ## D;\ |
132 | add k+4+round(%ebp),d ## D;\ |
133 | xor %edi, d ## D; |
134 | |
135 | /* |
136 | * a input register containing a |
137 | * b input register containing b (rotated 16) |
138 | * c input register containing c |
139 | * d input register containing d (already rol $1) |
140 | * operations on a and b are interleaved to increase performance |
141 | */ |
142 | #define decrypt_round(a,b,c,d,round)\ |
143 | push c ## D;\ |
144 | movzx a ## B, %edi;\ |
145 | mov (%ebp,%edi,4), c ## D;\ |
146 | movzx b ## B, %edi;\ |
147 | mov s3(%ebp,%edi,4),%esi;\ |
148 | movzx a ## H, %edi;\ |
149 | ror $16, a ## D;\ |
150 | xor s1(%ebp,%edi,4),c ## D;\ |
151 | movzx b ## H, %edi;\ |
152 | ror $16, b ## D;\ |
153 | xor (%ebp,%edi,4), %esi;\ |
154 | movzx a ## B, %edi;\ |
155 | xor s2(%ebp,%edi,4),c ## D;\ |
156 | movzx b ## B, %edi;\ |
157 | xor s1(%ebp,%edi,4),%esi;\ |
158 | movzx a ## H, %edi;\ |
159 | ror $15, a ## D;\ |
160 | xor s3(%ebp,%edi,4),c ## D;\ |
161 | movzx b ## H, %edi;\ |
162 | xor s2(%ebp,%edi,4),%esi;\ |
163 | pop %edi;\ |
164 | add %esi, c ## D;\ |
165 | add c ## D, %esi;\ |
166 | add k+round(%ebp), c ## D;\ |
167 | xor %edi, c ## D;\ |
168 | add k+4+round(%ebp),%esi;\ |
169 | xor %esi, d ## D;\ |
170 | rol $15, d ## D; |
171 | |
172 | /* |
173 | * a input register containing a |
174 | * b input register containing b (rotated 16) |
175 | * c input register containing c |
176 | * d input register containing d (already rol $1) |
177 | * operations on a and b are interleaved to increase performance |
178 | * last round has different rotations for the output preparation |
179 | */ |
180 | #define decrypt_last_round(a,b,c,d,round)\ |
181 | push c ## D;\ |
182 | movzx a ## B, %edi;\ |
183 | mov (%ebp,%edi,4), c ## D;\ |
184 | movzx b ## B, %edi;\ |
185 | mov s3(%ebp,%edi,4),%esi;\ |
186 | movzx a ## H, %edi;\ |
187 | ror $16, a ## D;\ |
188 | xor s1(%ebp,%edi,4),c ## D;\ |
189 | movzx b ## H, %edi;\ |
190 | ror $16, b ## D;\ |
191 | xor (%ebp,%edi,4), %esi;\ |
192 | movzx a ## B, %edi;\ |
193 | xor s2(%ebp,%edi,4),c ## D;\ |
194 | movzx b ## B, %edi;\ |
195 | xor s1(%ebp,%edi,4),%esi;\ |
196 | movzx a ## H, %edi;\ |
197 | ror $16, a ## D;\ |
198 | xor s3(%ebp,%edi,4),c ## D;\ |
199 | movzx b ## H, %edi;\ |
200 | xor s2(%ebp,%edi,4),%esi;\ |
201 | pop %edi;\ |
202 | add %esi, c ## D;\ |
203 | add c ## D, %esi;\ |
204 | add k+round(%ebp), c ## D;\ |
205 | xor %edi, c ## D;\ |
206 | add k+4+round(%ebp),%esi;\ |
207 | xor %esi, d ## D;\ |
208 | ror $1, d ## D; |
209 | |
210 | SYM_FUNC_START(twofish_enc_blk) |
211 | push %ebp /* save registers according to calling convention*/ |
212 | push %ebx |
213 | push %esi |
214 | push %edi |
215 | |
216 | mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base |
217 | * pointer to the ctx address */ |
218 | mov in_blk+16(%esp),%edi /* input address in edi */ |
219 | |
220 | mov (%edi), %eax |
221 | mov b_offset(%edi), %ebx |
222 | mov c_offset(%edi), %ecx |
223 | mov d_offset(%edi), %edx |
224 | input_whitening(%eax,%ebp,a_offset) |
225 | ror $16, %eax |
226 | input_whitening(%ebx,%ebp,b_offset) |
227 | input_whitening(%ecx,%ebp,c_offset) |
228 | input_whitening(%edx,%ebp,d_offset) |
229 | rol $1, %edx |
230 | |
231 | encrypt_round(R0,R1,R2,R3,0); |
232 | encrypt_round(R2,R3,R0,R1,8); |
233 | encrypt_round(R0,R1,R2,R3,2*8); |
234 | encrypt_round(R2,R3,R0,R1,3*8); |
235 | encrypt_round(R0,R1,R2,R3,4*8); |
236 | encrypt_round(R2,R3,R0,R1,5*8); |
237 | encrypt_round(R0,R1,R2,R3,6*8); |
238 | encrypt_round(R2,R3,R0,R1,7*8); |
239 | encrypt_round(R0,R1,R2,R3,8*8); |
240 | encrypt_round(R2,R3,R0,R1,9*8); |
241 | encrypt_round(R0,R1,R2,R3,10*8); |
242 | encrypt_round(R2,R3,R0,R1,11*8); |
243 | encrypt_round(R0,R1,R2,R3,12*8); |
244 | encrypt_round(R2,R3,R0,R1,13*8); |
245 | encrypt_round(R0,R1,R2,R3,14*8); |
246 | encrypt_last_round(R2,R3,R0,R1,15*8); |
247 | |
248 | output_whitening(%eax,%ebp,c_offset) |
249 | output_whitening(%ebx,%ebp,d_offset) |
250 | output_whitening(%ecx,%ebp,a_offset) |
251 | output_whitening(%edx,%ebp,b_offset) |
252 | mov out_blk+16(%esp),%edi; |
253 | mov %eax, c_offset(%edi) |
254 | mov %ebx, d_offset(%edi) |
255 | mov %ecx, (%edi) |
256 | mov %edx, b_offset(%edi) |
257 | |
258 | pop %edi |
259 | pop %esi |
260 | pop %ebx |
261 | pop %ebp |
262 | mov $1, %eax |
263 | RET |
264 | SYM_FUNC_END(twofish_enc_blk) |
265 | |
266 | SYM_FUNC_START(twofish_dec_blk) |
267 | push %ebp /* save registers according to calling convention*/ |
268 | push %ebx |
269 | push %esi |
270 | push %edi |
271 | |
272 | |
273 | mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base |
274 | * pointer to the ctx address */ |
275 | mov in_blk+16(%esp),%edi /* input address in edi */ |
276 | |
277 | mov (%edi), %eax |
278 | mov b_offset(%edi), %ebx |
279 | mov c_offset(%edi), %ecx |
280 | mov d_offset(%edi), %edx |
281 | output_whitening(%eax,%ebp,a_offset) |
282 | output_whitening(%ebx,%ebp,b_offset) |
283 | ror $16, %ebx |
284 | output_whitening(%ecx,%ebp,c_offset) |
285 | output_whitening(%edx,%ebp,d_offset) |
286 | rol $1, %ecx |
287 | |
288 | decrypt_round(R0,R1,R2,R3,15*8); |
289 | decrypt_round(R2,R3,R0,R1,14*8); |
290 | decrypt_round(R0,R1,R2,R3,13*8); |
291 | decrypt_round(R2,R3,R0,R1,12*8); |
292 | decrypt_round(R0,R1,R2,R3,11*8); |
293 | decrypt_round(R2,R3,R0,R1,10*8); |
294 | decrypt_round(R0,R1,R2,R3,9*8); |
295 | decrypt_round(R2,R3,R0,R1,8*8); |
296 | decrypt_round(R0,R1,R2,R3,7*8); |
297 | decrypt_round(R2,R3,R0,R1,6*8); |
298 | decrypt_round(R0,R1,R2,R3,5*8); |
299 | decrypt_round(R2,R3,R0,R1,4*8); |
300 | decrypt_round(R0,R1,R2,R3,3*8); |
301 | decrypt_round(R2,R3,R0,R1,2*8); |
302 | decrypt_round(R0,R1,R2,R3,1*8); |
303 | decrypt_last_round(R2,R3,R0,R1,0); |
304 | |
305 | input_whitening(%eax,%ebp,c_offset) |
306 | input_whitening(%ebx,%ebp,d_offset) |
307 | input_whitening(%ecx,%ebp,a_offset) |
308 | input_whitening(%edx,%ebp,b_offset) |
309 | mov out_blk+16(%esp),%edi; |
310 | mov %eax, c_offset(%edi) |
311 | mov %ebx, d_offset(%edi) |
312 | mov %ecx, (%edi) |
313 | mov %edx, b_offset(%edi) |
314 | |
315 | pop %edi |
316 | pop %esi |
317 | pop %ebx |
318 | pop %ebp |
319 | mov $1, %eax |
320 | RET |
321 | SYM_FUNC_END(twofish_dec_blk) |
322 | |