1 | /* SPDX-License-Identifier: GPL-2.0+ |
2 | * |
3 | * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ |
4 | * |
5 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
6 | * operating system. INET is implemented using the BSD Socket |
7 | * interface as the means of communication with the user level. |
8 | * |
9 | * IP/TCP/UDP checksumming routines |
10 | * |
11 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> |
12 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> |
13 | * Tom May, <ftom@netcom.com> |
14 | * Pentium Pro/II routines: |
15 | * Alexander Kjeldaas <astor@guardian.no> |
16 | * Finn Arne Gangstad <finnag@guardian.no> |
17 | * Lots of code moved from tcp.c and ip.c; see those files |
18 | * for more names. |
19 | * |
20 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception |
21 | * handling. |
22 | * Andi Kleen, add zeroing on error |
23 | * converted to pure assembler |
24 | * |
25 | * SuperH version: Copyright (C) 1999 Niibe Yutaka |
26 | */ |
27 | |
28 | #include <asm/errno.h> |
29 | #include <linux/linkage.h> |
30 | |
31 | /* |
32 | * computes a partial checksum, e.g. for TCP/UDP fragments |
33 | */ |
34 | |
35 | /* |
36 | * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); |
37 | */ |
38 | |
39 | .text |
40 | ENTRY(csum_partial) |
41 | /* |
42 | * Experiments with Ethernet and SLIP connections show that buff |
43 | * is aligned on either a 2-byte or 4-byte boundary. We get at |
44 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. |
45 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte |
46 | * alignment for the unrolled loop. |
47 | */ |
48 | mov r4, r0 |
49 | tst #3, r0 ! Check alignment. |
50 | bt/s 2f ! Jump if alignment is ok. |
51 | mov r4, r7 ! Keep a copy to check for alignment |
52 | ! |
53 | tst #1, r0 ! Check alignment. |
54 | bt 21f ! Jump if alignment is boundary of 2bytes. |
55 | |
56 | ! buf is odd |
57 | tst r5, r5 |
58 | add #-1, r5 |
59 | bt 9f |
60 | mov.b @r4+, r0 |
61 | extu.b r0, r0 |
62 | addc r0, r6 ! t=0 from previous tst |
63 | mov r6, r0 |
64 | shll8 r6 |
65 | shlr16 r0 |
66 | shlr8 r0 |
67 | or r0, r6 |
68 | mov r4, r0 |
69 | tst #2, r0 |
70 | bt 2f |
71 | 21: |
72 | ! buf is 2 byte aligned (len could be 0) |
73 | add #-2, r5 ! Alignment uses up two bytes. |
74 | cmp/pz r5 ! |
75 | bt/s 1f ! Jump if we had at least two bytes. |
76 | clrt |
77 | bra 6f |
78 | add #2, r5 ! r5 was < 2. Deal with it. |
79 | 1: |
80 | mov.w @r4+, r0 |
81 | extu.w r0, r0 |
82 | addc r0, r6 |
83 | bf 2f |
84 | add #1, r6 |
85 | 2: |
86 | ! buf is 4 byte aligned (len could be 0) |
87 | mov r5, r1 |
88 | mov #-5, r0 |
89 | shld r0, r1 |
90 | tst r1, r1 |
91 | bt/s 4f ! if it's =0, go to 4f |
92 | clrt |
93 | .align 2 |
94 | 3: |
95 | mov.l @r4+, r0 |
96 | mov.l @r4+, r2 |
97 | mov.l @r4+, r3 |
98 | addc r0, r6 |
99 | mov.l @r4+, r0 |
100 | addc r2, r6 |
101 | mov.l @r4+, r2 |
102 | addc r3, r6 |
103 | mov.l @r4+, r3 |
104 | addc r0, r6 |
105 | mov.l @r4+, r0 |
106 | addc r2, r6 |
107 | mov.l @r4+, r2 |
108 | addc r3, r6 |
109 | addc r0, r6 |
110 | addc r2, r6 |
111 | movt r0 |
112 | dt r1 |
113 | bf/s 3b |
114 | cmp/eq #1, r0 |
115 | ! here, we know r1==0 |
116 | addc r1, r6 ! add carry to r6 |
117 | 4: |
118 | mov r5, r0 |
119 | and #0x1c, r0 |
120 | tst r0, r0 |
121 | bt 6f |
122 | ! 4 bytes or more remaining |
123 | mov r0, r1 |
124 | shlr2 r1 |
125 | mov #0, r2 |
126 | 5: |
127 | addc r2, r6 |
128 | mov.l @r4+, r2 |
129 | movt r0 |
130 | dt r1 |
131 | bf/s 5b |
132 | cmp/eq #1, r0 |
133 | addc r2, r6 |
134 | addc r1, r6 ! r1==0 here, so it means add carry-bit |
135 | 6: |
136 | ! 3 bytes or less remaining |
137 | mov #3, r0 |
138 | and r0, r5 |
139 | tst r5, r5 |
140 | bt 9f ! if it's =0 go to 9f |
141 | mov #2, r1 |
142 | cmp/hs r1, r5 |
143 | bf 7f |
144 | mov.w @r4+, r0 |
145 | extu.w r0, r0 |
146 | cmp/eq r1, r5 |
147 | bt/s 8f |
148 | clrt |
149 | shll16 r0 |
150 | addc r0, r6 |
151 | 7: |
152 | mov.b @r4+, r0 |
153 | extu.b r0, r0 |
154 | #ifndef __LITTLE_ENDIAN__ |
155 | shll8 r0 |
156 | #endif |
157 | 8: |
158 | addc r0, r6 |
159 | mov #0, r0 |
160 | addc r0, r6 |
161 | 9: |
162 | ! Check if the buffer was misaligned, if so realign sum |
163 | mov r7, r0 |
164 | tst #1, r0 |
165 | bt 10f |
166 | mov r6, r0 |
167 | shll8 r6 |
168 | shlr16 r0 |
169 | shlr8 r0 |
170 | or r0, r6 |
171 | 10: |
172 | rts |
173 | mov r6, r0 |
174 | |
175 | /* |
176 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len) |
177 | */ |
178 | |
179 | /* |
180 | * Copy from ds while checksumming, otherwise like csum_partial with initial |
181 | * sum being ~0U |
182 | */ |
183 | |
184 | #define EXC(...) \ |
185 | 9999: __VA_ARGS__ ; \ |
186 | .section __ex_table, "a"; \ |
187 | .long 9999b, 6001f ; \ |
188 | .previous |
189 | |
190 | ! |
191 | ! r4: const char *SRC |
192 | ! r5: char *DST |
193 | ! r6: int LEN |
194 | ! |
195 | ENTRY(csum_partial_copy_generic) |
196 | mov #-1,r7 |
197 | mov #3,r0 ! Check src and dest are equally aligned |
198 | mov r4,r1 |
199 | and r0,r1 |
200 | and r5,r0 |
201 | cmp/eq r1,r0 |
202 | bf 3f ! Different alignments, use slow version |
203 | tst #1,r0 ! Check dest word aligned |
204 | bf 3f ! If not, do it the slow way |
205 | |
206 | mov #2,r0 |
207 | tst r0,r5 ! Check dest alignment. |
208 | bt 2f ! Jump if alignment is ok. |
209 | add #-2,r6 ! Alignment uses up two bytes. |
210 | cmp/pz r6 ! Jump if we had at least two bytes. |
211 | bt/s 1f |
212 | clrt |
213 | add #2,r6 ! r6 was < 2. Deal with it. |
214 | bra 4f |
215 | mov r6,r2 |
216 | |
217 | 3: ! Handle different src and dest alignments. |
218 | ! This is not common, so simple byte by byte copy will do. |
219 | mov r6,r2 |
220 | shlr r6 |
221 | tst r6,r6 |
222 | bt 4f |
223 | clrt |
224 | .align 2 |
225 | 5: |
226 | EXC( mov.b @r4+,r1 ) |
227 | EXC( mov.b @r4+,r0 ) |
228 | extu.b r1,r1 |
229 | EXC( mov.b r1,@r5 ) |
230 | EXC( mov.b r0,@(1,r5) ) |
231 | extu.b r0,r0 |
232 | add #2,r5 |
233 | |
234 | #ifdef __LITTLE_ENDIAN__ |
235 | shll8 r0 |
236 | #else |
237 | shll8 r1 |
238 | #endif |
239 | or r1,r0 |
240 | |
241 | addc r0,r7 |
242 | movt r0 |
243 | dt r6 |
244 | bf/s 5b |
245 | cmp/eq #1,r0 |
246 | mov #0,r0 |
247 | addc r0, r7 |
248 | |
249 | mov r2, r0 |
250 | tst #1, r0 |
251 | bt 7f |
252 | bra 5f |
253 | clrt |
254 | |
255 | ! src and dest equally aligned, but to a two byte boundary. |
256 | ! Handle first two bytes as a special case |
257 | .align 2 |
258 | 1: |
259 | EXC( mov.w @r4+,r0 ) |
260 | EXC( mov.w r0,@r5 ) |
261 | add #2,r5 |
262 | extu.w r0,r0 |
263 | addc r0,r7 |
264 | mov #0,r0 |
265 | addc r0,r7 |
266 | 2: |
267 | mov r6,r2 |
268 | mov #-5,r0 |
269 | shld r0,r6 |
270 | tst r6,r6 |
271 | bt/s 2f |
272 | clrt |
273 | .align 2 |
274 | 1: |
275 | EXC( mov.l @r4+,r0 ) |
276 | EXC( mov.l @r4+,r1 ) |
277 | addc r0,r7 |
278 | EXC( mov.l r0,@r5 ) |
279 | EXC( mov.l r1,@(4,r5) ) |
280 | addc r1,r7 |
281 | |
282 | EXC( mov.l @r4+,r0 ) |
283 | EXC( mov.l @r4+,r1 ) |
284 | addc r0,r7 |
285 | EXC( mov.l r0,@(8,r5) ) |
286 | EXC( mov.l r1,@(12,r5) ) |
287 | addc r1,r7 |
288 | |
289 | EXC( mov.l @r4+,r0 ) |
290 | EXC( mov.l @r4+,r1 ) |
291 | addc r0,r7 |
292 | EXC( mov.l r0,@(16,r5) ) |
293 | EXC( mov.l r1,@(20,r5) ) |
294 | addc r1,r7 |
295 | |
296 | EXC( mov.l @r4+,r0 ) |
297 | EXC( mov.l @r4+,r1 ) |
298 | addc r0,r7 |
299 | EXC( mov.l r0,@(24,r5) ) |
300 | EXC( mov.l r1,@(28,r5) ) |
301 | addc r1,r7 |
302 | add #32,r5 |
303 | movt r0 |
304 | dt r6 |
305 | bf/s 1b |
306 | cmp/eq #1,r0 |
307 | mov #0,r0 |
308 | addc r0,r7 |
309 | |
310 | 2: mov r2,r6 |
311 | mov #0x1c,r0 |
312 | and r0,r6 |
313 | cmp/pl r6 |
314 | bf/s 4f |
315 | clrt |
316 | shlr2 r6 |
317 | 3: |
318 | EXC( mov.l @r4+,r0 ) |
319 | addc r0,r7 |
320 | EXC( mov.l r0,@r5 ) |
321 | add #4,r5 |
322 | movt r0 |
323 | dt r6 |
324 | bf/s 3b |
325 | cmp/eq #1,r0 |
326 | mov #0,r0 |
327 | addc r0,r7 |
328 | 4: mov r2,r6 |
329 | mov #3,r0 |
330 | and r0,r6 |
331 | cmp/pl r6 |
332 | bf 7f |
333 | mov #2,r1 |
334 | cmp/hs r1,r6 |
335 | bf 5f |
336 | EXC( mov.w @r4+,r0 ) |
337 | EXC( mov.w r0,@r5 ) |
338 | extu.w r0,r0 |
339 | add #2,r5 |
340 | cmp/eq r1,r6 |
341 | bt/s 6f |
342 | clrt |
343 | shll16 r0 |
344 | addc r0,r7 |
345 | 5: |
346 | EXC( mov.b @r4+,r0 ) |
347 | EXC( mov.b r0,@r5 ) |
348 | extu.b r0,r0 |
349 | #ifndef __LITTLE_ENDIAN__ |
350 | shll8 r0 |
351 | #endif |
352 | 6: addc r0,r7 |
353 | mov #0,r0 |
354 | addc r0,r7 |
355 | 7: |
356 | |
357 | # Exception handler: |
358 | .section .fixup, "ax" |
359 | |
360 | 6001: |
361 | rts |
362 | mov #0,r0 |
363 | .previous |
364 | rts |
365 | mov r7,r0 |
366 | |