sha1.c source code [libiberty/sha1.c]

1	/ sha1.c - Functions to compute SHA1 message digest of files or*
2	memory blocks according to the NIST specification FIPS-180-1.
3
4	Copyright (C) 2000-2026 Free Software Foundation, Inc.
5
6	This program is free software; you can redistribute it and/or modify it
7	under the terms of the GNU General Public License as published by the
8	Free Software Foundation; either version 2, or (at your option) any
9	later version.
10
11	This program is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program; if not, write to the Free Software Foundation,
18	Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. /*
19
20	/ Written by Scott G. Miller*
21	Credits:
22	Robert Klep <robert@ilse.nl> -- Expansion function fix
23	*/
24
25	#include <config.h>
26
27	#include "sha1.h"
28
29	#include <stddef.h>
30	#include <string.h>
31
32	#ifdef HAVE_X86_SHA1_HW_SUPPORT
33	# include <x86intrin.h>
34	# include <cpuid.h>
35	#endif
36
37	#if USE_UNLOCKED_IO
38	# include "unlocked-io.h"
39	#endif
40
41	#ifdef WORDS_BIGENDIAN
42	# define SWAP(n) (n)
43	#else
44	# define SWAP(n) \
45	(((n) << 24) \| (((n) & 0xff00) << 8) \| (((n) >> 8) & 0xff00) \| ((n) >> 24))
46	#endif
47
48	#define BLOCKSIZE 4096
49	#if BLOCKSIZE % 64 != 0
50	# error "invalid BLOCKSIZE"
51	#endif
52
53	/ This array contains the bytes used to pad the buffer to the next*
54	64-byte boundary. (RFC 1321, 3.1: Step 1) /*
55	static const unsigned char fillbuf[`64`] = { `0x80`, `0` / , 0, 0, ... / };
56
57
58	/ Take a pointer to a 160 bit block of data (five 32 bit ints) and*
59	initialize it to the start constants of the SHA1 algorithm. This
60	must be called before using hash in the call to sha1_hash. /*
61	void
62	sha1_init_ctx (struct sha1_ctx *ctx)
63	{
64	ctx->A = `0x67452301`;
65	ctx->B = `0xefcdab89`;
66	ctx->C = `0x98badcfe`;
67	ctx->D = `0x10325476`;
68	ctx->E = `0xc3d2e1f0`;
69
70	ctx->total[`0`] = ctx->total[`1`] = `0`;
71	ctx->buflen = `0`;
72	}
73
74	/ Put result from CTX in first 20 bytes following RESBUF. The result*
75	must be in little endian byte order.
76
77	IMPORTANT: On some systems it is required that RESBUF is correctly
78	aligned for a 32-bit value. /*
79	void *
80	sha1_read_ctx (const struct sha1_ctx ctx, void* *resbuf)
81	{
82	((sha1_uint32 *) resbuf)[`0`] = SWAP (ctx->A);
83	((sha1_uint32 *) resbuf)[`1`] = SWAP (ctx->B);
84	((sha1_uint32 *) resbuf)[`2`] = SWAP (ctx->C);
85	((sha1_uint32 *) resbuf)[`3`] = SWAP (ctx->D);
86	((sha1_uint32 *) resbuf)[`4`] = SWAP (ctx->E);
87
88	return resbuf;
89	}
90
91	/ Process the remaining bytes in the internal buffer and the usual*
92	prolog according to the standard and write the result to RESBUF.
93
94	IMPORTANT: On some systems it is required that RESBUF is correctly
95	aligned for a 32-bit value. /*
96	void *
97	sha1_finish_ctx (struct sha1_ctx ctx, void* *resbuf)
98	{
99	/ Take yet unprocessed bytes into account. /
100	sha1_uint32 bytes = ctx->buflen;
101	size_t size = (bytes < `56`) ? `64` / `4` : `64` * `2` / `4`;
102
103	/ Now count remaining bytes. /
104	ctx->total[`0`] += bytes;
105	if (ctx->total[`0`] < bytes)
106	++ctx->total[`1`];
107
108	/ Put the 64-bit file length in bits at the end of the buffer. /
109	ctx->buffer[size - `2`] = SWAP ((ctx->total[`1`] << `3`) \| (ctx->total[`0`] >> `29`));
110	ctx->buffer[size - `1`] = SWAP (ctx->total[`0`] << `3`);
111
112	memcpy (dest: &((char ) ctx->buffer)[bytes], src: fillbuf, n: (size - `2`) `4` - bytes);
113
114	/ Process last bytes. /
115	sha1_process_block (buffer: ctx->buffer, len: size * `4`, ctx);
116
117	return sha1_read_ctx (ctx, resbuf);
118	}
119
120	/ Compute SHA1 message digest for bytes read from STREAM. The*
121	resulting message digest number will be written into the 16 bytes
122	beginning at RESBLOCK. /*
123	int
124	sha1_stream (FILE stream, void* *resblock)
125	{
126	struct sha1_ctx ctx;
127	char buffer[BLOCKSIZE + `72`];
128	size_t sum;
129
130	/ Initialize the computation context. /
131	sha1_init_ctx (ctx: &ctx);
132
133	/ Iterate over full file contents. /
134	while (`1`)
135	{
136	/ We read the file in blocks of BLOCKSIZE bytes. One call of the*
137	computation function processes the whole buffer so that with the
138	next round of the loop another block can be read. /*
139	size_t n;
140	sum = `0`;
141
142	/ Read block. Take care for partial reads. /
143	while (`1`)
144	{
145	n = fread (ptr: buffer + sum, size: `1`, BLOCKSIZE - sum, stream: stream);
146
147	sum += n;
148
149	if (sum == BLOCKSIZE)
150	break;
151
152	if (n == `0`)
153	{
154	/ Check for the error flag IFF N == 0, so that we don't*
155	exit the loop after a partial read due to e.g., EAGAIN
156	or EWOULDBLOCK. /*
157	if (ferror (stream: stream))
158	return `1`;
159	goto process_partial_block;
160	}
161
162	/ We've read at least one byte, so ignore errors. But always*
163	check for EOF, since feof may be true even though N > 0.
164	Otherwise, we could end up calling fread after EOF. /*
165	if (feof (stream: stream))
166	goto process_partial_block;
167	}
168
169	/ Process buffer with BLOCKSIZE bytes. Note that*
170	BLOCKSIZE % 64 == 0
171	*/
172	sha1_process_block (buffer, BLOCKSIZE, ctx: &ctx);
173	}
174
175	process_partial_block:;
176
177	/ Process any remaining bytes. /
178	if (sum > `0`)
179	sha1_process_bytes (buffer, len: sum, ctx: &ctx);
180
181	/ Construct result in desired memory. /
182	sha1_finish_ctx (ctx: &ctx, resbuf: resblock);
183	return `0`;
184	}
185
186	/ Compute SHA1 message digest for LEN bytes beginning at BUFFER. The*
187	result is always in little endian byte order, so that a byte-wise
188	output yields to the wanted ASCII representation of the message
189	digest. /*
190	void *
191	sha1_buffer (const char buffer, size_t len, void* *resblock)
192	{
193	struct sha1_ctx ctx;
194
195	/ Initialize the computation context. /
196	sha1_init_ctx (ctx: &ctx);
197
198	/ Process whole buffer but last len % 64 bytes. /
199	sha1_process_bytes (buffer, len, ctx: &ctx);
200
201	/ Put result in desired memory area. /
202	return sha1_finish_ctx (ctx: &ctx, resbuf: resblock);
203	}
204
205	void
206	sha1_process_bytes (const void buffer, size_t len, struct* sha1_ctx *ctx)
207	{
208	/ When we already have some bits in our internal buffer concatenate*
209	both inputs first. /*
210	if (ctx->buflen != `0`)
211	{
212	size_t left_over = ctx->buflen;
213	size_t add = `128` - left_over > len ? len : `128` - left_over;
214
215	memcpy (dest: &((char *) ctx->buffer)[left_over], src: buffer, n: add);
216	ctx->buflen += add;
217
218	if (ctx->buflen > `64`)
219	{
220	sha1_process_block (buffer: ctx->buffer, len: ctx->buflen & ~`63`, ctx);
221
222	ctx->buflen &= `63`;
223	/ The regions in the following copy operation cannot overlap. /
224	memcpy (dest: ctx->buffer,
225	src: &((char *) ctx->buffer)[(left_over + add) & ~`63`],
226	n: ctx->buflen);
227	}
228
229	buffer = (const char *) buffer + add;
230	len -= add;
231	}
232
233	/ Process available complete blocks. /
234	if (len >= `64`)
235	{
236	#if !_STRING_ARCH_unaligned
237	# define alignof(type) offsetof (struct { char c; type x; }, x)
238	# define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
239	if (UNALIGNED_P (buffer))
240	while (len > `64`)
241	{
242	sha1_process_block (buffer: memcpy (dest: ctx->buffer, src: buffer, n: `64`), len: `64`, ctx);
243	buffer = (const char *) buffer + `64`;
244	len -= `64`;
245	}
246	else
247	#endif
248	{
249	sha1_process_block (buffer, len: len & ~`63`, ctx);
250	buffer = (const char *) buffer + (len & ~`63`);
251	len &= `63`;
252	}
253	}
254
255	/ Move remaining bytes in internal buffer. /
256	if (len > `0`)
257	{
258	size_t left_over = ctx->buflen;
259
260	memcpy (dest: &((char *) ctx->buffer)[left_over], src: buffer, n: len);
261	left_over += len;
262	if (left_over >= `64`)
263	{
264	sha1_process_block (buffer: ctx->buffer, len: `64`, ctx);
265	left_over -= `64`;
266	memmove (dest: ctx->buffer, src: &ctx->buffer[`16`], n: left_over);
267	}
268	ctx->buflen = left_over;
269	}
270	}
271
272	/ --- Code below is the primary difference between md5.c and sha1.c --- /
273
274	/ SHA1 round constants /
275	#define K1 0x5a827999
276	#define K2 0x6ed9eba1
277	#define K3 0x8f1bbcdc
278	#define K4 0xca62c1d6
279
280	/ Round functions. Note that F2 is the same as F4. /
281	#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
282	#define F2(B,C,D) (B ^ C ^ D)
283	#define F3(B,C,D) ( ( B & C ) \| ( D & ( B \| C ) ) )
284	#define F4(B,C,D) (B ^ C ^ D)
285
286	/ Process LEN bytes of BUFFER, accumulating context into CTX.*
287	It is assumed that LEN % 64 == 0.
288	Most of this code comes from GnuPG's cipher/sha1.c. /*
289
290	void
291	sha1_process_block (const void buffer, size_t len, struct* sha1_ctx *ctx)
292	{
293	const sha1_uint32 words = (const* sha1_uint32*) buffer;
294	size_t nwords = len / sizeof (sha1_uint32);
295	const sha1_uint32 *endp = words + nwords;
296	sha1_uint32 x[`16`];
297	sha1_uint32 a = ctx->A;
298	sha1_uint32 b = ctx->B;
299	sha1_uint32 c = ctx->C;
300	sha1_uint32 d = ctx->D;
301	sha1_uint32 e = ctx->E;
302
303	/ First increment the byte count. RFC 1321 specifies the possible*
304	length of the file up to 2^64 bits. Here we only compute the
305	number of bytes. Do a double word increment. /*
306	ctx->total[`0`] += len;
307	ctx->total[`1`] += ((len >> `31`) >> `1`) + (ctx->total[`0`] < len);
308
309	#define rol(x, n) (((x) << (n)) \| ((sha1_uint32) (x) >> (32 - (n))))
310
311	#define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \
312	^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
313	, (x[I&0x0f] = rol(tm, 1)) )
314
315	#define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \
316	+ F( B, C, D ) \
317	+ K \
318	+ M; \
319	B = rol( B, 30 ); \
320	} while(0)
321
322	while (words < endp)
323	{
324	sha1_uint32 tm;
325	int t;
326	for (t = `0`; t < `16`; t++)
327	{
328	x[t] = SWAP (*words);
329	words++;
330	}
331
332	R( a, b, c, d, e, F1, K1, x[ `0`] );
333	R( e, a, b, c, d, F1, K1, x[ `1`] );
334	R( d, e, a, b, c, F1, K1, x[ `2`] );
335	R( c, d, e, a, b, F1, K1, x[ `3`] );
336	R( b, c, d, e, a, F1, K1, x[ `4`] );
337	R( a, b, c, d, e, F1, K1, x[ `5`] );
338	R( e, a, b, c, d, F1, K1, x[ `6`] );
339	R( d, e, a, b, c, F1, K1, x[ `7`] );
340	R( c, d, e, a, b, F1, K1, x[ `8`] );
341	R( b, c, d, e, a, F1, K1, x[ `9`] );
342	R( a, b, c, d, e, F1, K1, x[`10`] );
343	R( e, a, b, c, d, F1, K1, x[`11`] );
344	R( d, e, a, b, c, F1, K1, x[`12`] );
345	R( c, d, e, a, b, F1, K1, x[`13`] );
346	R( b, c, d, e, a, F1, K1, x[`14`] );
347	R( a, b, c, d, e, F1, K1, x[`15`] );
348	R( e, a, b, c, d, F1, K1, M(`16`) );
349	R( d, e, a, b, c, F1, K1, M(`17`) );
350	R( c, d, e, a, b, F1, K1, M(`18`) );
351	R( b, c, d, e, a, F1, K1, M(`19`) );
352	R( a, b, c, d, e, F2, K2, M(`20`) );
353	R( e, a, b, c, d, F2, K2, M(`21`) );
354	R( d, e, a, b, c, F2, K2, M(`22`) );
355	R( c, d, e, a, b, F2, K2, M(`23`) );
356	R( b, c, d, e, a, F2, K2, M(`24`) );
357	R( a, b, c, d, e, F2, K2, M(`25`) );
358	R( e, a, b, c, d, F2, K2, M(`26`) );
359	R( d, e, a, b, c, F2, K2, M(`27`) );
360	R( c, d, e, a, b, F2, K2, M(`28`) );
361	R( b, c, d, e, a, F2, K2, M(`29`) );
362	R( a, b, c, d, e, F2, K2, M(`30`) );
363	R( e, a, b, c, d, F2, K2, M(`31`) );
364	R( d, e, a, b, c, F2, K2, M(`32`) );
365	R( c, d, e, a, b, F2, K2, M(`33`) );
366	R( b, c, d, e, a, F2, K2, M(`34`) );
367	R( a, b, c, d, e, F2, K2, M(`35`) );
368	R( e, a, b, c, d, F2, K2, M(`36`) );
369	R( d, e, a, b, c, F2, K2, M(`37`) );
370	R( c, d, e, a, b, F2, K2, M(`38`) );
371	R( b, c, d, e, a, F2, K2, M(`39`) );
372	R( a, b, c, d, e, F3, K3, M(`40`) );
373	R( e, a, b, c, d, F3, K3, M(`41`) );
374	R( d, e, a, b, c, F3, K3, M(`42`) );
375	R( c, d, e, a, b, F3, K3, M(`43`) );
376	R( b, c, d, e, a, F3, K3, M(`44`) );
377	R( a, b, c, d, e, F3, K3, M(`45`) );
378	R( e, a, b, c, d, F3, K3, M(`46`) );
379	R( d, e, a, b, c, F3, K3, M(`47`) );
380	R( c, d, e, a, b, F3, K3, M(`48`) );
381	R( b, c, d, e, a, F3, K3, M(`49`) );
382	R( a, b, c, d, e, F3, K3, M(`50`) );
383	R( e, a, b, c, d, F3, K3, M(`51`) );
384	R( d, e, a, b, c, F3, K3, M(`52`) );
385	R( c, d, e, a, b, F3, K3, M(`53`) );
386	R( b, c, d, e, a, F3, K3, M(`54`) );
387	R( a, b, c, d, e, F3, K3, M(`55`) );
388	R( e, a, b, c, d, F3, K3, M(`56`) );
389	R( d, e, a, b, c, F3, K3, M(`57`) );
390	R( c, d, e, a, b, F3, K3, M(`58`) );
391	R( b, c, d, e, a, F3, K3, M(`59`) );
392	R( a, b, c, d, e, F4, K4, M(`60`) );
393	R( e, a, b, c, d, F4, K4, M(`61`) );
394	R( d, e, a, b, c, F4, K4, M(`62`) );
395	R( c, d, e, a, b, F4, K4, M(`63`) );
396	R( b, c, d, e, a, F4, K4, M(`64`) );
397	R( a, b, c, d, e, F4, K4, M(`65`) );
398	R( e, a, b, c, d, F4, K4, M(`66`) );
399	R( d, e, a, b, c, F4, K4, M(`67`) );
400	R( c, d, e, a, b, F4, K4, M(`68`) );
401	R( b, c, d, e, a, F4, K4, M(`69`) );
402	R( a, b, c, d, e, F4, K4, M(`70`) );
403	R( e, a, b, c, d, F4, K4, M(`71`) );
404	R( d, e, a, b, c, F4, K4, M(`72`) );
405	R( c, d, e, a, b, F4, K4, M(`73`) );
406	R( b, c, d, e, a, F4, K4, M(`74`) );
407	R( a, b, c, d, e, F4, K4, M(`75`) );
408	R( e, a, b, c, d, F4, K4, M(`76`) );
409	R( d, e, a, b, c, F4, K4, M(`77`) );
410	R( c, d, e, a, b, F4, K4, M(`78`) );
411	R( b, c, d, e, a, F4, K4, M(`79`) );
412
413	a = ctx->A += a;
414	b = ctx->B += b;
415	c = ctx->C += c;
416	d = ctx->D += d;
417	e = ctx->E += e;
418	}
419	}
420
421	#if defined(HAVE_X86_SHA1_HW_SUPPORT)
422	/ HW specific version of sha1_process_bytes. /
423
424	static void sha1_hw_process_block (const void , size_t, struct* sha1_ctx *);
425
426	static void
427	sha1_hw_process_bytes (const void buffer, size_t len, struct* sha1_ctx *ctx)
428	{
429	/ When we already have some bits in our internal buffer concatenate*
430	both inputs first. /*
431	if (ctx->buflen != `0`)
432	{
433	size_t left_over = ctx->buflen;
434	size_t add = `128` - left_over > len ? len : `128` - left_over;
435
436	memcpy (dest: &((char *) ctx->buffer)[left_over], src: buffer, n: add);
437	ctx->buflen += add;
438
439	if (ctx->buflen > `64`)
440	{
441	sha1_hw_process_block (ctx->buffer, ctx->buflen & ~`63`, ctx);
442
443	ctx->buflen &= `63`;
444	/ The regions in the following copy operation cannot overlap. /
445	memcpy (dest: ctx->buffer,
446	src: &((char *) ctx->buffer)[(left_over + add) & ~`63`],
447	n: ctx->buflen);
448	}
449
450	buffer = (const char *) buffer + add;
451	len -= add;
452	}
453
454	/ Process available complete blocks. /
455	if (len >= `64`)
456	{
457	#if !_STRING_ARCH_unaligned
458	# define alignof(type) offsetof (struct { char c; type x; }, x)
459	# define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
460	if (UNALIGNED_P (buffer))
461	while (len > `64`)
462	{
463	sha1_hw_process_block (memcpy (dest: ctx->buffer, src: buffer, n: `64`), `64`, ctx);
464	buffer = (const char *) buffer + `64`;
465	len -= `64`;
466	}
467	else
468	#endif
469	{
470	sha1_hw_process_block (buffer, len & ~`63`, ctx);
471	buffer = (const char *) buffer + (len & ~`63`);
472	len &= `63`;
473	}
474	}
475
476	/ Move remaining bytes in internal buffer. /
477	if (len > `0`)
478	{
479	size_t left_over = ctx->buflen;
480
481	memcpy (dest: &((char *) ctx->buffer)[left_over], src: buffer, n: len);
482	left_over += len;
483	if (left_over >= `64`)
484	{
485	sha1_hw_process_block (ctx->buffer, `64`, ctx);
486	left_over -= `64`;
487	memmove (dest: ctx->buffer, src: &ctx->buffer[`16`], n: left_over);
488	}
489	ctx->buflen = left_over;
490	}
491	}
492
493	/ Process LEN bytes of BUFFER, accumulating context into CTX.*
494	Using CPU specific intrinsics. /*
495
496	#ifdef HAVE_X86_SHA1_HW_SUPPORT
497	__attribute__((__target__ ("sse4.1,sha")))
498	#endif
499	static void
500	sha1_hw_process_block (const void buffer, size_t len, struct* sha1_ctx *ctx)
501	{
502	#ifdef HAVE_X86_SHA1_HW_SUPPORT
503	/ Implemented from*
504	https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html /*
505	const __m128i words = (const* __m128i *) buffer;
506	const __m128i endp = (const* __m128i ) ((const* char *) buffer + len);
507	__m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
508	const __m128i shuf_mask
509	= _mm_set_epi64x (q1: `0x0001020304050607ULL`, q0: `0x08090a0b0c0d0e0fULL`);
510	char check[((offsetof (struct sha1_ctx, B)
511	== offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
512	&& (offsetof (struct sha1_ctx, C)
513	== offsetof (struct sha1_ctx, A) + `2` * sizeof (ctx->A))
514	&& (offsetof (struct sha1_ctx, D)
515	== offsetof (struct sha1_ctx, A) + `3` * sizeof (ctx->A)))
516	? `1` : -`1`];
517
518	/ First increment the byte count. RFC 1321 specifies the possible*
519	length of the file up to 2^64 bits. Here we only compute the
520	number of bytes. Do a double word increment. /*
521	ctx->total[`0`] += len;
522	ctx->total[`1`] += ((len >> `31`) >> `1`) + (ctx->total[`0`] < len);
523
524	(void) &check[`0`];
525	abcd = _mm_loadu_si128 (p: (const __m128i *) &ctx->A);
526	e0 = _mm_set_epi32 (i3: ctx->E, i2: `0`, i1: `0`, i0: `0`);
527	abcd = _mm_shuffle_epi32 (abcd, `0x1b`); / 0, 1, 2, 3 /
528
529	while (words < endp)
530	{
531	abcd_save = abcd;
532	e0_save = e0;
533
534	/ 0..3 /
535	msg0 = _mm_loadu_si128 (p: words);
536	msg0 = _mm_shuffle_epi8 (a: msg0, b: shuf_mask);
537	e0 = _mm_add_epi32 (a: e0, b: msg0);
538	e1 = abcd;
539	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `0`);
540
541	/ 4..7 /
542	msg1 = _mm_loadu_si128 (p: words + `1`);
543	msg1 = _mm_shuffle_epi8 (a: msg1, b: shuf_mask);
544	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg1);
545	e0 = abcd;
546	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `0`);
547	msg0 = _mm_sha1msg1_epu32 (X: msg0, Y: msg1);
548
549	/ 8..11 /
550	msg2 = _mm_loadu_si128 (p: words + `2`);
551	msg2 = _mm_shuffle_epi8 (a: msg2, b: shuf_mask);
552	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg2);
553	e1 = abcd;
554	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `0`);
555	msg1 = _mm_sha1msg1_epu32 (X: msg1, Y: msg2);
556	msg0 = _mm_xor_si128 (a: msg0, b: msg2);
557
558	/ 12..15 /
559	msg3 = _mm_loadu_si128 (p: words + `3`);
560	msg3 = _mm_shuffle_epi8 (a: msg3, b: shuf_mask);
561	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg3);
562	e0 = abcd;
563	msg0 = _mm_sha1msg2_epu32 (X: msg0, Y: msg3);
564	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `0`);
565	msg2 = _mm_sha1msg1_epu32 (X: msg2, Y: msg3);
566	msg1 = _mm_xor_si128 (a: msg1, b: msg3);
567
568	/ 16..19 /
569	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg0);
570	e1 = abcd;
571	msg1 = _mm_sha1msg2_epu32 (X: msg1, Y: msg0);
572	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `0`);
573	msg3 = _mm_sha1msg1_epu32 (X: msg3, Y: msg0);
574	msg2 = _mm_xor_si128 (a: msg2, b: msg0);
575
576	/ 20..23 /
577	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg1);
578	e0 = abcd;
579	msg2 = _mm_sha1msg2_epu32 (X: msg2, Y: msg1);
580	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `1`);
581	msg0 = _mm_sha1msg1_epu32 (X: msg0, Y: msg1);
582	msg3 = _mm_xor_si128 (a: msg3, b: msg1);
583
584	/ 24..27 /
585	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg2);
586	e1 = abcd;
587	msg3 = _mm_sha1msg2_epu32 (X: msg3, Y: msg2);
588	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `1`);
589	msg1 = _mm_sha1msg1_epu32 (X: msg1, Y: msg2);
590	msg0 = _mm_xor_si128 (a: msg0, b: msg2);
591
592	/ 28..31 /
593	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg3);
594	e0 = abcd;
595	msg0 = _mm_sha1msg2_epu32 (X: msg0, Y: msg3);
596	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `1`);
597	msg2 = _mm_sha1msg1_epu32 (X: msg2, Y: msg3);
598	msg1 = _mm_xor_si128 (a: msg1, b: msg3);
599
600	/ 32..35 /
601	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg0);
602	e1 = abcd;
603	msg1 = _mm_sha1msg2_epu32 (X: msg1, Y: msg0);
604	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `1`);
605	msg3 = _mm_sha1msg1_epu32 (X: msg3, Y: msg0);
606	msg2 = _mm_xor_si128 (a: msg2, b: msg0);
607
608	/ 36..39 /
609	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg1);
610	e0 = abcd;
611	msg2 = _mm_sha1msg2_epu32 (X: msg2, Y: msg1);
612	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `1`);
613	msg0 = _mm_sha1msg1_epu32 (X: msg0, Y: msg1);
614	msg3 = _mm_xor_si128 (a: msg3, b: msg1);
615
616	/ 40..43 /
617	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg2);
618	e1 = abcd;
619	msg3 = _mm_sha1msg2_epu32 (X: msg3, Y: msg2);
620	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `2`);
621	msg1 = _mm_sha1msg1_epu32 (X: msg1, Y: msg2);
622	msg0 = _mm_xor_si128 (a: msg0, b: msg2);
623
624	/ 44..47 /
625	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg3);
626	e0 = abcd;
627	msg0 = _mm_sha1msg2_epu32 (X: msg0, Y: msg3);
628	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `2`);
629	msg2 = _mm_sha1msg1_epu32 (X: msg2, Y: msg3);
630	msg1 = _mm_xor_si128 (a: msg1, b: msg3);
631
632	/ 48..51 /
633	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg0);
634	e1 = abcd;
635	msg1 = _mm_sha1msg2_epu32 (X: msg1, Y: msg0);
636	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `2`);
637	msg3 = _mm_sha1msg1_epu32 (X: msg3, Y: msg0);
638	msg2 = _mm_xor_si128 (a: msg2, b: msg0);
639
640	/ 52..55 /
641	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg1);
642	e0 = abcd;
643	msg2 = _mm_sha1msg2_epu32 (X: msg2, Y: msg1);
644	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `2`);
645	msg0 = _mm_sha1msg1_epu32 (X: msg0, Y: msg1);
646	msg3 = _mm_xor_si128 (a: msg3, b: msg1);
647
648	/ 56..59 /
649	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg2);
650	e1 = abcd;
651	msg3 = _mm_sha1msg2_epu32 (X: msg3, Y: msg2);
652	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `2`);
653	msg1 = _mm_sha1msg1_epu32 (X: msg1, Y: msg2);
654	msg0 = _mm_xor_si128 (a: msg0, b: msg2);
655
656	/ 60..63 /
657	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg3);
658	e0 = abcd;
659	msg0 = _mm_sha1msg2_epu32 (X: msg0, Y: msg3);
660	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `3`);
661	msg2 = _mm_sha1msg1_epu32 (X: msg2, Y: msg3);
662	msg1 = _mm_xor_si128 (a: msg1, b: msg3);
663
664	/ 64..67 /
665	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg0);
666	e1 = abcd;
667	msg1 = _mm_sha1msg2_epu32 (X: msg1, Y: msg0);
668	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `3`);
669	msg3 = _mm_sha1msg1_epu32 (X: msg3, Y: msg0);
670	msg2 = _mm_xor_si128 (a: msg2, b: msg0);
671
672	/ 68..71 /
673	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg1);
674	e0 = abcd;
675	msg2 = _mm_sha1msg2_epu32 (X: msg2, Y: msg1);
676	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `3`);
677	msg3 = _mm_xor_si128 (a: msg3, b: msg1);
678
679	/ 72..75 /
680	e0 = _mm_sha1nexte_epu32 (X: e0, Y: msg2);
681	e1 = abcd;
682	msg3 = _mm_sha1msg2_epu32 (X: msg3, Y: msg2);
683	abcd = _mm_sha1rnds4_epu32 (abcd, e0, `3`);
684
685	/ 76..79 /
686	e1 = _mm_sha1nexte_epu32 (X: e1, Y: msg3);
687	e0 = abcd;
688	abcd = _mm_sha1rnds4_epu32 (abcd, e1, `3`);
689
690	/ Finalize. /
691	e0 = _mm_sha1nexte_epu32 (X: e0, Y: e0_save);
692	abcd = _mm_add_epi32 (a: abcd, b: abcd_save);
693
694	words = words + `4`;
695	}
696
697	abcd = _mm_shuffle_epi32 (abcd, `0x1b`); / 0, 1, 2, 3 /
698	_mm_storeu_si128 (p: (__m128i *) &ctx->A, b: abcd);
699	ctx->E = _mm_extract_epi32 (e0, `3`);
700	#endif
701	}
702	#endif
703
704	/ Return sha1_process_bytes or some hardware optimized version thereof*
705	depending on current CPU. /*
706
707	sha1_process_bytes_fn
708	sha1_choose_process_bytes (void)
709	{
710	#ifdef HAVE_X86_SHA1_HW_SUPPORT
711	unsigned int eax, ebx, ecx, edx;
712	if (__get_cpuid_count (leaf: `7`, subleaf: `0`, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx)
713	&& (ebx & bit_SHA) != `0`
714	&& __get_cpuid (leaf: `1`, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx)
715	&& (ecx & bit_SSE4_1) != `0`)
716	return sha1_hw_process_bytes;
717	#endif
718	return sha1_process_bytes;
719	}
720

source code of libiberty/sha1.c