csum-partial_64.c source code [linux/arch/x86/lib/csum-partial_64.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* arch/x86_64/lib/csum-partial.c
4	*
5	* This file contains network checksum routines that are better done
6	* in an architecture-specific manner due to speed.
7	*/
8
9	#include <linux/compiler.h>
10	#include <linux/export.h>
11	#include <asm/checksum.h>
12	#include <asm/word-at-a-time.h>
13
14	static inline unsigned short from32to16(unsigned a)
15	{
16	unsigned short b = a >> `16`;
17	asm("addw %w2,%w0\n\t"
18	"adcw $0,%w0\n"
19	: "=r" (b)
20	: "0" (b), "r" (a));
21	return b;
22	}
23
24	static inline __wsum csum_tail(u64 temp64, int odd)
25	{
26	unsigned int result;
27
28	result = add32_with_carry(a: temp64 >> `32`, b: temp64 & `0xffffffff`);
29	if (unlikely(odd)) {
30	result = from32to16(a: result);
31	result = ((result >> `8`) & `0xff`) \| ((result & `0xff`) << `8`);
32	}
33	return (__force __wsum)result;
34	}
35
36	/*
37	* Do a checksum on an arbitrary memory area.
38	* Returns a 32bit checksum.
39	*
40	* This isn't as time critical as it used to be because many NICs
41	* do hardware checksumming these days.
42	*
43	* Still, with CHECKSUM_COMPLETE this is called to compute
44	* checksums on IPv6 headers (40 bytes) and other small parts.
45	* it's best to have buff aligned on a 64-bit boundary
46	*/
47	__wsum csum_partial(const void buff, int* len, __wsum sum)
48	{
49	u64 temp64 = (__force u64)sum;
50	unsigned odd;
51
52	odd = `1` & (unsigned long) buff;
53	if (unlikely(odd)) {
54	if (unlikely(len == `0`))
55	return sum;
56	temp64 = ror32(word: (__force u32)sum, shift: `8`);
57	temp64 += ((unsigned* char *)buff << `8`);
58	len--;
59	buff++;
60	}
61
62	/*
63	* len == 40 is the hot case due to IPv6 headers, but annotating it likely()
64	* has noticeable negative affect on codegen for all other cases with
65	* minimal performance benefit here.
66	*/
67	if (len == `40`) {
68	asm("addq 0*8(%[src]),%[res]\n\t"
69	"adcq 1*8(%[src]),%[res]\n\t"
70	"adcq 2*8(%[src]),%[res]\n\t"
71	"adcq 3*8(%[src]),%[res]\n\t"
72	"adcq 4*8(%[src]),%[res]\n\t"
73	"adcq $0,%[res]"
74	: [res] "+r"(temp64)
75	: [src] "r"(buff), "m"((const* char(*)[`40`])buff));
76	return csum_tail(temp64, odd);
77	}
78	if (unlikely(len >= `64`)) {
79	/*
80	* Extra accumulators for better ILP in the loop.
81	*/
82	u64 tmp_accum, tmp_carries;
83
84	asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t"
85	"xorl %k[tmp_carries],%k[tmp_carries]\n\t"
86	"subl $64, %[len]\n\t"
87	"1:\n\t"
88	"addq 0*8(%[src]),%[res]\n\t"
89	"adcq 1*8(%[src]),%[res]\n\t"
90	"adcq 2*8(%[src]),%[res]\n\t"
91	"adcq 3*8(%[src]),%[res]\n\t"
92	"adcl $0,%k[tmp_carries]\n\t"
93	"addq 4*8(%[src]),%[tmp_accum]\n\t"
94	"adcq 5*8(%[src]),%[tmp_accum]\n\t"
95	"adcq 6*8(%[src]),%[tmp_accum]\n\t"
96	"adcq 7*8(%[src]),%[tmp_accum]\n\t"
97	"adcl $0,%k[tmp_carries]\n\t"
98	"addq $64, %[src]\n\t"
99	"subl $64, %[len]\n\t"
100	"jge 1b\n\t"
101	"addq %[tmp_accum],%[res]\n\t"
102	"adcq %[tmp_carries],%[res]\n\t"
103	"adcq $0,%[res]"
104	: [tmp_accum] "=&r"(tmp_accum),
105	[tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64),
106	[len] "+r"(len), [src] "+r"(buff)
107	: "m"((const* char *)buff));
108	}
109
110	if (len & `32`) {
111	asm("addq 0*8(%[src]),%[res]\n\t"
112	"adcq 1*8(%[src]),%[res]\n\t"
113	"adcq 2*8(%[src]),%[res]\n\t"
114	"adcq 3*8(%[src]),%[res]\n\t"
115	"adcq $0,%[res]"
116	: [res] "+r"(temp64)
117	: [src] "r"(buff), "m"((const* char(*)[`32`])buff));
118	buff += `32`;
119	}
120	if (len & `16`) {
121	asm("addq 0*8(%[src]),%[res]\n\t"
122	"adcq 1*8(%[src]),%[res]\n\t"
123	"adcq $0,%[res]"
124	: [res] "+r"(temp64)
125	: [src] "r"(buff), "m"((const* char(*)[`16`])buff));
126	buff += `16`;
127	}
128	if (len & `8`) {
129	asm("addq 0*8(%[src]),%[res]\n\t"
130	"adcq $0,%[res]"
131	: [res] "+r"(temp64)
132	: [src] "r"(buff), "m"((const* char(*)[`8`])buff));
133	buff += `8`;
134	}
135	if (len & `7`) {
136	unsigned int shift = (-len << `3`) & `63`;
137	unsigned long trail;
138
139	trail = (load_unaligned_zeropad(addr: buff) << shift) >> shift;
140
141	asm("addq %[trail],%[res]\n\t"
142	"adcq $0,%[res]"
143	: [res] "+r"(temp64)
144	: [trail] "r"(trail));
145	}
146	return csum_tail(temp64, odd);
147	}
148	EXPORT_SYMBOL(csum_partial);
149
150	/*
151	* this routine is used for miscellaneous IP-like checksums, mainly
152	* in icmp.c
153	*/
154	__sum16 ip_compute_csum(const void buff, int* len)
155	{
156	return csum_fold(sum: csum_partial(buff, len, `0`));
157	}
158	EXPORT_SYMBOL(ip_compute_csum);
159

source code of linux/arch/x86/lib/csum-partial_64.c