1/*
2 * Intel SHA Extensions optimized implementation of a SHA-1 update function
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2015 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Sean Gulley <sean.m.gulley@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2015 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 *
54 */
55
56#include <linux/linkage.h>
57#include <linux/cfi_types.h>
58
59#define DIGEST_PTR %rdi /* 1st arg */
60#define DATA_PTR %rsi /* 2nd arg */
61#define NUM_BLKS %rdx /* 3rd arg */
62
63/* gcc conversion */
64#define FRAME_SIZE 32 /* space for 2x16 bytes */
65
66#define ABCD %xmm0
67#define E0 %xmm1 /* Need two E's b/c they ping pong */
68#define E1 %xmm2
69#define MSG0 %xmm3
70#define MSG1 %xmm4
71#define MSG2 %xmm5
72#define MSG3 %xmm6
73#define SHUF_MASK %xmm7
74
75
76/*
77 * Intel SHA Extensions optimized implementation of a SHA-1 update function
78 *
79 * The function takes a pointer to the current hash values, a pointer to the
80 * input data, and a number of 64 byte blocks to process. Once all blocks have
81 * been processed, the digest pointer is updated with the resulting hash value.
82 * The function only processes complete blocks, there is no functionality to
83 * store partial blocks. All message padding and hash value initialization must
84 * be done outside the update function.
85 *
86 * The indented lines in the loop are instructions related to rounds processing.
87 * The non-indented lines are instructions related to the message schedule.
88 *
89 * void sha1_ni_transform(uint32_t *digest, const void *data,
90 uint32_t numBlocks)
91 * digest : pointer to digest
92 * data: pointer to input data
93 * numBlocks: Number of blocks to process
94 */
95.text
96SYM_TYPED_FUNC_START(sha1_ni_transform)
97 push %rbp
98 mov %rsp, %rbp
99 sub $FRAME_SIZE, %rsp
100 and $~0xF, %rsp
101
102 shl $6, NUM_BLKS /* convert to bytes */
103 jz .Ldone_hash
104 add DATA_PTR, NUM_BLKS /* pointer to end of data */
105
106 /* load initial hash values */
107 pinsrd $3, 1*16(DIGEST_PTR), E0
108 movdqu 0*16(DIGEST_PTR), ABCD
109 pand UPPER_WORD_MASK(%rip), E0
110 pshufd $0x1B, ABCD, ABCD
111
112 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
113
114.Lloop0:
115 /* Save hash values for addition after rounds */
116 movdqa E0, (0*16)(%rsp)
117 movdqa ABCD, (1*16)(%rsp)
118
119 /* Rounds 0-3 */
120 movdqu 0*16(DATA_PTR), MSG0
121 pshufb SHUF_MASK, MSG0
122 paddd MSG0, E0
123 movdqa ABCD, E1
124 sha1rnds4 $0, E0, ABCD
125
126 /* Rounds 4-7 */
127 movdqu 1*16(DATA_PTR), MSG1
128 pshufb SHUF_MASK, MSG1
129 sha1nexte MSG1, E1
130 movdqa ABCD, E0
131 sha1rnds4 $0, E1, ABCD
132 sha1msg1 MSG1, MSG0
133
134 /* Rounds 8-11 */
135 movdqu 2*16(DATA_PTR), MSG2
136 pshufb SHUF_MASK, MSG2
137 sha1nexte MSG2, E0
138 movdqa ABCD, E1
139 sha1rnds4 $0, E0, ABCD
140 sha1msg1 MSG2, MSG1
141 pxor MSG2, MSG0
142
143 /* Rounds 12-15 */
144 movdqu 3*16(DATA_PTR), MSG3
145 pshufb SHUF_MASK, MSG3
146 sha1nexte MSG3, E1
147 movdqa ABCD, E0
148 sha1msg2 MSG3, MSG0
149 sha1rnds4 $0, E1, ABCD
150 sha1msg1 MSG3, MSG2
151 pxor MSG3, MSG1
152
153 /* Rounds 16-19 */
154 sha1nexte MSG0, E0
155 movdqa ABCD, E1
156 sha1msg2 MSG0, MSG1
157 sha1rnds4 $0, E0, ABCD
158 sha1msg1 MSG0, MSG3
159 pxor MSG0, MSG2
160
161 /* Rounds 20-23 */
162 sha1nexte MSG1, E1
163 movdqa ABCD, E0
164 sha1msg2 MSG1, MSG2
165 sha1rnds4 $1, E1, ABCD
166 sha1msg1 MSG1, MSG0
167 pxor MSG1, MSG3
168
169 /* Rounds 24-27 */
170 sha1nexte MSG2, E0
171 movdqa ABCD, E1
172 sha1msg2 MSG2, MSG3
173 sha1rnds4 $1, E0, ABCD
174 sha1msg1 MSG2, MSG1
175 pxor MSG2, MSG0
176
177 /* Rounds 28-31 */
178 sha1nexte MSG3, E1
179 movdqa ABCD, E0
180 sha1msg2 MSG3, MSG0
181 sha1rnds4 $1, E1, ABCD
182 sha1msg1 MSG3, MSG2
183 pxor MSG3, MSG1
184
185 /* Rounds 32-35 */
186 sha1nexte MSG0, E0
187 movdqa ABCD, E1
188 sha1msg2 MSG0, MSG1
189 sha1rnds4 $1, E0, ABCD
190 sha1msg1 MSG0, MSG3
191 pxor MSG0, MSG2
192
193 /* Rounds 36-39 */
194 sha1nexte MSG1, E1
195 movdqa ABCD, E0
196 sha1msg2 MSG1, MSG2
197 sha1rnds4 $1, E1, ABCD
198 sha1msg1 MSG1, MSG0
199 pxor MSG1, MSG3
200
201 /* Rounds 40-43 */
202 sha1nexte MSG2, E0
203 movdqa ABCD, E1
204 sha1msg2 MSG2, MSG3
205 sha1rnds4 $2, E0, ABCD
206 sha1msg1 MSG2, MSG1
207 pxor MSG2, MSG0
208
209 /* Rounds 44-47 */
210 sha1nexte MSG3, E1
211 movdqa ABCD, E0
212 sha1msg2 MSG3, MSG0
213 sha1rnds4 $2, E1, ABCD
214 sha1msg1 MSG3, MSG2
215 pxor MSG3, MSG1
216
217 /* Rounds 48-51 */
218 sha1nexte MSG0, E0
219 movdqa ABCD, E1
220 sha1msg2 MSG0, MSG1
221 sha1rnds4 $2, E0, ABCD
222 sha1msg1 MSG0, MSG3
223 pxor MSG0, MSG2
224
225 /* Rounds 52-55 */
226 sha1nexte MSG1, E1
227 movdqa ABCD, E0
228 sha1msg2 MSG1, MSG2
229 sha1rnds4 $2, E1, ABCD
230 sha1msg1 MSG1, MSG0
231 pxor MSG1, MSG3
232
233 /* Rounds 56-59 */
234 sha1nexte MSG2, E0
235 movdqa ABCD, E1
236 sha1msg2 MSG2, MSG3
237 sha1rnds4 $2, E0, ABCD
238 sha1msg1 MSG2, MSG1
239 pxor MSG2, MSG0
240
241 /* Rounds 60-63 */
242 sha1nexte MSG3, E1
243 movdqa ABCD, E0
244 sha1msg2 MSG3, MSG0
245 sha1rnds4 $3, E1, ABCD
246 sha1msg1 MSG3, MSG2
247 pxor MSG3, MSG1
248
249 /* Rounds 64-67 */
250 sha1nexte MSG0, E0
251 movdqa ABCD, E1
252 sha1msg2 MSG0, MSG1
253 sha1rnds4 $3, E0, ABCD
254 sha1msg1 MSG0, MSG3
255 pxor MSG0, MSG2
256
257 /* Rounds 68-71 */
258 sha1nexte MSG1, E1
259 movdqa ABCD, E0
260 sha1msg2 MSG1, MSG2
261 sha1rnds4 $3, E1, ABCD
262 pxor MSG1, MSG3
263
264 /* Rounds 72-75 */
265 sha1nexte MSG2, E0
266 movdqa ABCD, E1
267 sha1msg2 MSG2, MSG3
268 sha1rnds4 $3, E0, ABCD
269
270 /* Rounds 76-79 */
271 sha1nexte MSG3, E1
272 movdqa ABCD, E0
273 sha1rnds4 $3, E1, ABCD
274
275 /* Add current hash values with previously saved */
276 sha1nexte (0*16)(%rsp), E0
277 paddd (1*16)(%rsp), ABCD
278
279 /* Increment data pointer and loop if more to process */
280 add $64, DATA_PTR
281 cmp NUM_BLKS, DATA_PTR
282 jne .Lloop0
283
284 /* Write hash values back in the correct order */
285 pshufd $0x1B, ABCD, ABCD
286 movdqu ABCD, 0*16(DIGEST_PTR)
287 pextrd $3, E0, 1*16(DIGEST_PTR)
288
289.Ldone_hash:
290 mov %rbp, %rsp
291 pop %rbp
292
293 RET
294SYM_FUNC_END(sha1_ni_transform)
295
296.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
297.align 16
298PSHUFFLE_BYTE_FLIP_MASK:
299 .octa 0x000102030405060708090a0b0c0d0e0f
300
301.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
302.align 16
303UPPER_WORD_MASK:
304 .octa 0xFFFFFFFF000000000000000000000000
305

source code of linux/arch/x86/crypto/sha1_ni_asm.S