checksum_64.S source code [linux/arch/sparc/lib/checksum_64.S]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	/ checksum.S: Sparc V9 optimized checksum code.*
3	*
4	* Copyright(C) 1995 Linus Torvalds
5	* Copyright(C) 1995 Miguel de Icaza
6	* Copyright(C) 1996, 2000 David S. Miller
7	* Copyright(C) 1997 Jakub Jelinek
8	*
9	* derived from:
10	* Linux/Alpha checksum c-code
11	* Linux/ix86 inline checksum assembly
12	* RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
13	* David Mosberger-Tang for optimized reference c-code
14	* BSD4.4 portable checksum routine
15	*/
16
17	#include <linux/export.h>
18	.text
19
20	csum_partial_fix_alignment:
21	/ We checked for zero length already, so there must be*
22	* at least one byte.
23	*/
24	be,pt %icc, `1f`
25	nop
26	ldub [%o0 + `0x00`], %o4
27	add %o0, `1`, %o0
28	sub %o1, `1`, %o1
29	`1`: andcc %o0, `0x2`, %g0
30	be,pn %icc, csum_partial_post_align
31	cmp %o1, `2`
32	blu,pn %icc, csum_partial_end_cruft
33	nop
34	lduh [%o0 + `0x00`], %o5
35	add %o0, `2`, %o0
36	sub %o1, `2`, %o1
37	ba,pt %xcc, csum_partial_post_align
38	add %o5, %o4, %o4
39
40	.align `32`
41	.globl csum_partial
42	.type csum_partial,#function
43	EXPORT_SYMBOL(csum_partial)
44	csum_partial: / %o0=buff, %o1=len, %o2=sum /
45	prefetch [%o0 + `0x000`], #n_reads
46	clr %o4
47	prefetch [%o0 + `0x040`], #n_reads
48	brz,pn %o1, csum_partial_finish
49	andcc %o0, `0x3`, %g0
50
51	/ We "remember" whether the lowest bit in the address*
52	* was set in %g7. Because if it is, we have to swap
53	* upper and lower 8 bit fields of the sum we calculate.
54	*/
55	bne,pn %icc, csum_partial_fix_alignment
56	andcc %o0, `0x1`, %g7
57
58	csum_partial_post_align:
59	prefetch [%o0 + `0x080`], #n_reads
60	andncc %o1, `0x3f`, %o3
61
62	prefetch [%o0 + `0x0c0`], #n_reads
63	sub %o1, %o3, %o1
64	brz,pn %o3, `2f`
65	prefetch [%o0 + `0x100`], #n_reads
66
67	/ So that we don't need to use the non-pairing*
68	* add-with-carry instructions we accumulate 32-bit
69	* values into a 64-bit register. At the end of the
70	* loop we fold it down to 32-bits and so on.
71	*/
72	prefetch [%o0 + `0x140`], #n_reads
73	`1`: lduw [%o0 + `0x00`], %o5
74	lduw [%o0 + `0x04`], %g1
75	lduw [%o0 + `0x08`], %g2
76	add %o4, %o5, %o4
77	lduw [%o0 + `0x0c`], %g3
78	add %o4, %g1, %o4
79	lduw [%o0 + `0x10`], %o5
80	add %o4, %g2, %o4
81	lduw [%o0 + `0x14`], %g1
82	add %o4, %g3, %o4
83	lduw [%o0 + `0x18`], %g2
84	add %o4, %o5, %o4
85	lduw [%o0 + `0x1c`], %g3
86	add %o4, %g1, %o4
87	lduw [%o0 + `0x20`], %o5
88	add %o4, %g2, %o4
89	lduw [%o0 + `0x24`], %g1
90	add %o4, %g3, %o4
91	lduw [%o0 + `0x28`], %g2
92	add %o4, %o5, %o4
93	lduw [%o0 + `0x2c`], %g3
94	add %o4, %g1, %o4
95	lduw [%o0 + `0x30`], %o5
96	add %o4, %g2, %o4
97	lduw [%o0 + `0x34`], %g1
98	add %o4, %g3, %o4
99	lduw [%o0 + `0x38`], %g2
100	add %o4, %o5, %o4
101	lduw [%o0 + `0x3c`], %g3
102	add %o4, %g1, %o4
103	prefetch [%o0 + `0x180`], #n_reads
104	add %o4, %g2, %o4
105	subcc %o3, `0x40`, %o3
106	add %o0, `0x40`, %o0
107	bne,pt %icc, `1b`
108	add %o4, %g3, %o4
109
110	`2`: and %o1, `0x3c`, %o3
111	brz,pn %o3, `2f`
112	sub %o1, %o3, %o1
113	`1`: lduw [%o0 + `0x00`], %o5
114	subcc %o3, `0x4`, %o3
115	add %o0, `0x4`, %o0
116	bne,pt %icc, `1b`
117	add %o4, %o5, %o4
118
119	`2`:
120	/ fold 64-->32 /
121	srlx %o4, `32`, %o5
122	srl %o4, `0`, %o4
123	add %o4, %o5, %o4
124	srlx %o4, `32`, %o5
125	srl %o4, `0`, %o4
126	add %o4, %o5, %o4
127
128	/ fold 32-->16 /
129	sethi %hi(`0xffff0000`), %g1
130	srl %o4, `16`, %o5
131	andn %o4, %g1, %g2
132	add %o5, %g2, %o4
133	srl %o4, `16`, %o5
134	andn %o4, %g1, %g2
135	add %o5, %g2, %o4
136
137	csum_partial_end_cruft:
138	/ %o4 has the 16-bit sum we have calculated so-far. /
139	cmp %o1, `2`
140	blu,pt %icc, `1f`
141	nop
142	lduh [%o0 + `0x00`], %o5
143	sub %o1, `2`, %o1
144	add %o0, `2`, %o0
145	add %o4, %o5, %o4
146	`1`: brz,pt %o1, `1f`
147	nop
148	ldub [%o0 + `0x00`], %o5
149	sub %o1, `1`, %o1
150	add %o0, `1`, %o0
151	sllx %o5, `8`, %o5
152	add %o4, %o5, %o4
153	`1`:
154	/ fold 32-->16 /
155	sethi %hi(`0xffff0000`), %g1
156	srl %o4, `16`, %o5
157	andn %o4, %g1, %g2
158	add %o5, %g2, %o4
159	srl %o4, `16`, %o5
160	andn %o4, %g1, %g2
161	add %o5, %g2, %o4
162
163	`1`: brz,pt %g7, `1f`
164	nop
165
166	/ We started with an odd byte, byte-swap the result. /
167	srl %o4, `8`, %o5
168	and %o4, `0xff`, %g1
169	sll %g1, `8`, %g1
170	or %o5, %g1, %o4
171
172	`1`: addcc %o2, %o4, %o2
173	addc %g0, %o2, %o2
174
175	csum_partial_finish:
176	retl
177	srl %o2, `0`, %o0
178

source code of linux/arch/sparc/lib/checksum_64.S