1/*
2 * Intel SHA Extensions optimized implementation of a SHA-256 update function
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2015 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Sean Gulley <sean.m.gulley@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2015 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 *
54 */
55
56#include <linux/linkage.h>
57#include <linux/cfi_types.h>
58
59#define DIGEST_PTR %rdi /* 1st arg */
60#define DATA_PTR %rsi /* 2nd arg */
61#define NUM_BLKS %rdx /* 3rd arg */
62
63#define SHA256CONSTANTS %rax
64
65#define MSG %xmm0
66#define STATE0 %xmm1
67#define STATE1 %xmm2
68#define MSGTMP0 %xmm3
69#define MSGTMP1 %xmm4
70#define MSGTMP2 %xmm5
71#define MSGTMP3 %xmm6
72#define MSGTMP4 %xmm7
73
74#define SHUF_MASK %xmm8
75
76#define ABEF_SAVE %xmm9
77#define CDGH_SAVE %xmm10
78
79/*
80 * Intel SHA Extensions optimized implementation of a SHA-256 update function
81 *
82 * The function takes a pointer to the current hash values, a pointer to the
83 * input data, and a number of 64 byte blocks to process. Once all blocks have
84 * been processed, the digest pointer is updated with the resulting hash value.
85 * The function only processes complete blocks, there is no functionality to
86 * store partial blocks. All message padding and hash value initialization must
87 * be done outside the update function.
88 *
89 * The indented lines in the loop are instructions related to rounds processing.
90 * The non-indented lines are instructions related to the message schedule.
91 *
92 * void sha256_ni_transform(uint32_t *digest, const void *data,
93 uint32_t numBlocks);
94 * digest : pointer to digest
95 * data: pointer to input data
96 * numBlocks: Number of blocks to process
97 */
98
99.text
100SYM_TYPED_FUNC_START(sha256_ni_transform)
101
102 shl $6, NUM_BLKS /* convert to bytes */
103 jz .Ldone_hash
104 add DATA_PTR, NUM_BLKS /* pointer to end of data */
105
106 /*
107 * load initial hash values
108 * Need to reorder these appropriately
109 * DCBA, HGFE -> ABEF, CDGH
110 */
111 movdqu 0*16(DIGEST_PTR), STATE0
112 movdqu 1*16(DIGEST_PTR), STATE1
113
114 pshufd $0xB1, STATE0, STATE0 /* CDAB */
115 pshufd $0x1B, STATE1, STATE1 /* EFGH */
116 movdqa STATE0, MSGTMP4
117 palignr $8, STATE1, STATE0 /* ABEF */
118 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */
119
120 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
121 lea K256(%rip), SHA256CONSTANTS
122
123.Lloop0:
124 /* Save hash values for addition after rounds */
125 movdqa STATE0, ABEF_SAVE
126 movdqa STATE1, CDGH_SAVE
127
128 /* Rounds 0-3 */
129 movdqu 0*16(DATA_PTR), MSG
130 pshufb SHUF_MASK, MSG
131 movdqa MSG, MSGTMP0
132 paddd 0*16(SHA256CONSTANTS), MSG
133 sha256rnds2 STATE0, STATE1
134 pshufd $0x0E, MSG, MSG
135 sha256rnds2 STATE1, STATE0
136
137 /* Rounds 4-7 */
138 movdqu 1*16(DATA_PTR), MSG
139 pshufb SHUF_MASK, MSG
140 movdqa MSG, MSGTMP1
141 paddd 1*16(SHA256CONSTANTS), MSG
142 sha256rnds2 STATE0, STATE1
143 pshufd $0x0E, MSG, MSG
144 sha256rnds2 STATE1, STATE0
145 sha256msg1 MSGTMP1, MSGTMP0
146
147 /* Rounds 8-11 */
148 movdqu 2*16(DATA_PTR), MSG
149 pshufb SHUF_MASK, MSG
150 movdqa MSG, MSGTMP2
151 paddd 2*16(SHA256CONSTANTS), MSG
152 sha256rnds2 STATE0, STATE1
153 pshufd $0x0E, MSG, MSG
154 sha256rnds2 STATE1, STATE0
155 sha256msg1 MSGTMP2, MSGTMP1
156
157 /* Rounds 12-15 */
158 movdqu 3*16(DATA_PTR), MSG
159 pshufb SHUF_MASK, MSG
160 movdqa MSG, MSGTMP3
161 paddd 3*16(SHA256CONSTANTS), MSG
162 sha256rnds2 STATE0, STATE1
163 movdqa MSGTMP3, MSGTMP4
164 palignr $4, MSGTMP2, MSGTMP4
165 paddd MSGTMP4, MSGTMP0
166 sha256msg2 MSGTMP3, MSGTMP0
167 pshufd $0x0E, MSG, MSG
168 sha256rnds2 STATE1, STATE0
169 sha256msg1 MSGTMP3, MSGTMP2
170
171 /* Rounds 16-19 */
172 movdqa MSGTMP0, MSG
173 paddd 4*16(SHA256CONSTANTS), MSG
174 sha256rnds2 STATE0, STATE1
175 movdqa MSGTMP0, MSGTMP4
176 palignr $4, MSGTMP3, MSGTMP4
177 paddd MSGTMP4, MSGTMP1
178 sha256msg2 MSGTMP0, MSGTMP1
179 pshufd $0x0E, MSG, MSG
180 sha256rnds2 STATE1, STATE0
181 sha256msg1 MSGTMP0, MSGTMP3
182
183 /* Rounds 20-23 */
184 movdqa MSGTMP1, MSG
185 paddd 5*16(SHA256CONSTANTS), MSG
186 sha256rnds2 STATE0, STATE1
187 movdqa MSGTMP1, MSGTMP4
188 palignr $4, MSGTMP0, MSGTMP4
189 paddd MSGTMP4, MSGTMP2
190 sha256msg2 MSGTMP1, MSGTMP2
191 pshufd $0x0E, MSG, MSG
192 sha256rnds2 STATE1, STATE0
193 sha256msg1 MSGTMP1, MSGTMP0
194
195 /* Rounds 24-27 */
196 movdqa MSGTMP2, MSG
197 paddd 6*16(SHA256CONSTANTS), MSG
198 sha256rnds2 STATE0, STATE1
199 movdqa MSGTMP2, MSGTMP4
200 palignr $4, MSGTMP1, MSGTMP4
201 paddd MSGTMP4, MSGTMP3
202 sha256msg2 MSGTMP2, MSGTMP3
203 pshufd $0x0E, MSG, MSG
204 sha256rnds2 STATE1, STATE0
205 sha256msg1 MSGTMP2, MSGTMP1
206
207 /* Rounds 28-31 */
208 movdqa MSGTMP3, MSG
209 paddd 7*16(SHA256CONSTANTS), MSG
210 sha256rnds2 STATE0, STATE1
211 movdqa MSGTMP3, MSGTMP4
212 palignr $4, MSGTMP2, MSGTMP4
213 paddd MSGTMP4, MSGTMP0
214 sha256msg2 MSGTMP3, MSGTMP0
215 pshufd $0x0E, MSG, MSG
216 sha256rnds2 STATE1, STATE0
217 sha256msg1 MSGTMP3, MSGTMP2
218
219 /* Rounds 32-35 */
220 movdqa MSGTMP0, MSG
221 paddd 8*16(SHA256CONSTANTS), MSG
222 sha256rnds2 STATE0, STATE1
223 movdqa MSGTMP0, MSGTMP4
224 palignr $4, MSGTMP3, MSGTMP4
225 paddd MSGTMP4, MSGTMP1
226 sha256msg2 MSGTMP0, MSGTMP1
227 pshufd $0x0E, MSG, MSG
228 sha256rnds2 STATE1, STATE0
229 sha256msg1 MSGTMP0, MSGTMP3
230
231 /* Rounds 36-39 */
232 movdqa MSGTMP1, MSG
233 paddd 9*16(SHA256CONSTANTS), MSG
234 sha256rnds2 STATE0, STATE1
235 movdqa MSGTMP1, MSGTMP4
236 palignr $4, MSGTMP0, MSGTMP4
237 paddd MSGTMP4, MSGTMP2
238 sha256msg2 MSGTMP1, MSGTMP2
239 pshufd $0x0E, MSG, MSG
240 sha256rnds2 STATE1, STATE0
241 sha256msg1 MSGTMP1, MSGTMP0
242
243 /* Rounds 40-43 */
244 movdqa MSGTMP2, MSG
245 paddd 10*16(SHA256CONSTANTS), MSG
246 sha256rnds2 STATE0, STATE1
247 movdqa MSGTMP2, MSGTMP4
248 palignr $4, MSGTMP1, MSGTMP4
249 paddd MSGTMP4, MSGTMP3
250 sha256msg2 MSGTMP2, MSGTMP3
251 pshufd $0x0E, MSG, MSG
252 sha256rnds2 STATE1, STATE0
253 sha256msg1 MSGTMP2, MSGTMP1
254
255 /* Rounds 44-47 */
256 movdqa MSGTMP3, MSG
257 paddd 11*16(SHA256CONSTANTS), MSG
258 sha256rnds2 STATE0, STATE1
259 movdqa MSGTMP3, MSGTMP4
260 palignr $4, MSGTMP2, MSGTMP4
261 paddd MSGTMP4, MSGTMP0
262 sha256msg2 MSGTMP3, MSGTMP0
263 pshufd $0x0E, MSG, MSG
264 sha256rnds2 STATE1, STATE0
265 sha256msg1 MSGTMP3, MSGTMP2
266
267 /* Rounds 48-51 */
268 movdqa MSGTMP0, MSG
269 paddd 12*16(SHA256CONSTANTS), MSG
270 sha256rnds2 STATE0, STATE1
271 movdqa MSGTMP0, MSGTMP4
272 palignr $4, MSGTMP3, MSGTMP4
273 paddd MSGTMP4, MSGTMP1
274 sha256msg2 MSGTMP0, MSGTMP1
275 pshufd $0x0E, MSG, MSG
276 sha256rnds2 STATE1, STATE0
277 sha256msg1 MSGTMP0, MSGTMP3
278
279 /* Rounds 52-55 */
280 movdqa MSGTMP1, MSG
281 paddd 13*16(SHA256CONSTANTS), MSG
282 sha256rnds2 STATE0, STATE1
283 movdqa MSGTMP1, MSGTMP4
284 palignr $4, MSGTMP0, MSGTMP4
285 paddd MSGTMP4, MSGTMP2
286 sha256msg2 MSGTMP1, MSGTMP2
287 pshufd $0x0E, MSG, MSG
288 sha256rnds2 STATE1, STATE0
289
290 /* Rounds 56-59 */
291 movdqa MSGTMP2, MSG
292 paddd 14*16(SHA256CONSTANTS), MSG
293 sha256rnds2 STATE0, STATE1
294 movdqa MSGTMP2, MSGTMP4
295 palignr $4, MSGTMP1, MSGTMP4
296 paddd MSGTMP4, MSGTMP3
297 sha256msg2 MSGTMP2, MSGTMP3
298 pshufd $0x0E, MSG, MSG
299 sha256rnds2 STATE1, STATE0
300
301 /* Rounds 60-63 */
302 movdqa MSGTMP3, MSG
303 paddd 15*16(SHA256CONSTANTS), MSG
304 sha256rnds2 STATE0, STATE1
305 pshufd $0x0E, MSG, MSG
306 sha256rnds2 STATE1, STATE0
307
308 /* Add current hash values with previously saved */
309 paddd ABEF_SAVE, STATE0
310 paddd CDGH_SAVE, STATE1
311
312 /* Increment data pointer and loop if more to process */
313 add $64, DATA_PTR
314 cmp NUM_BLKS, DATA_PTR
315 jne .Lloop0
316
317 /* Write hash values back in the correct order */
318 pshufd $0x1B, STATE0, STATE0 /* FEBA */
319 pshufd $0xB1, STATE1, STATE1 /* DCHG */
320 movdqa STATE0, MSGTMP4
321 pblendw $0xF0, STATE1, STATE0 /* DCBA */
322 palignr $8, MSGTMP4, STATE1 /* HGFE */
323
324 movdqu STATE0, 0*16(DIGEST_PTR)
325 movdqu STATE1, 1*16(DIGEST_PTR)
326
327.Ldone_hash:
328
329 RET
330SYM_FUNC_END(sha256_ni_transform)
331
332.section .rodata.cst256.K256, "aM", @progbits, 256
333.align 64
334K256:
335 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
336 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
337 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
338 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
339 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
340 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
341 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
342 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
343 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
344 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
345 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
346 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
347 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
348 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
349 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
350 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
351
352.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
353.align 16
354PSHUFFLE_BYTE_FLIP_MASK:
355 .octa 0x0c0d0e0f08090a0b0405060700010203
356

source code of linux/arch/x86/crypto/sha256_ni_asm.S