1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Checksum functions for Hexagon |
4 | * |
5 | * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. |
6 | */ |
7 | |
8 | /* This was derived from arch/alpha/lib/checksum.c */ |
9 | |
10 | |
11 | #include <linux/module.h> |
12 | #include <linux/string.h> |
13 | |
14 | #include <asm/byteorder.h> |
15 | #include <net/checksum.h> |
16 | #include <linux/uaccess.h> |
17 | #include <asm/intrinsics.h> |
18 | |
19 | |
20 | /* Vector value operations */ |
21 | #define SIGN(x, y) ((0x8000ULL*x)<<y) |
22 | #define CARRY(x, y) ((0x0002ULL*x)<<y) |
23 | #define SELECT(x, y) ((0x0001ULL*x)<<y) |
24 | |
25 | #define VR_NEGATE(a, b, c, d) (SIGN(a, 48) + SIGN(b, 32) + SIGN(c, 16) \ |
26 | + SIGN(d, 0)) |
27 | #define VR_CARRY(a, b, c, d) (CARRY(a, 48) + CARRY(b, 32) + CARRY(c, 16) \ |
28 | + CARRY(d, 0)) |
29 | #define VR_SELECT(a, b, c, d) (SELECT(a, 48) + SELECT(b, 32) + SELECT(c, 16) \ |
30 | + SELECT(d, 0)) |
31 | |
32 | |
33 | /* optimized HEXAGON V3 intrinsic version */ |
34 | static inline unsigned short from64to16(u64 x) |
35 | { |
36 | u64 sum; |
37 | |
38 | sum = HEXAGON_P_vrmpyh_PP(x^VR_NEGATE(1, 1, 1, 1), |
39 | VR_SELECT(1, 1, 1, 1)); |
40 | sum += VR_CARRY(0, 0, 1, 0); |
41 | sum = HEXAGON_P_vrmpyh_PP(sum, VR_SELECT(0, 0, 1, 1)); |
42 | |
43 | return 0xFFFF & sum; |
44 | } |
45 | |
46 | /* |
47 | * computes the checksum of the TCP/UDP pseudo-header |
48 | * returns a 16-bit checksum, already complemented. |
49 | */ |
50 | __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, |
51 | __u32 len, __u8 proto, __wsum sum) |
52 | { |
53 | return (__force __sum16)~from64to16( |
54 | x: (__force u64)saddr + (__force u64)daddr + |
55 | (__force u64)sum + ((len + proto) << 8)); |
56 | } |
57 | |
58 | __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, |
59 | __u32 len, __u8 proto, __wsum sum) |
60 | { |
61 | u64 result; |
62 | |
63 | result = (__force u64)saddr + (__force u64)daddr + |
64 | (__force u64)sum + ((len + proto) << 8); |
65 | |
66 | /* Fold down to 32-bits so we don't lose in the typedef-less |
67 | network stack. */ |
68 | /* 64 to 33 */ |
69 | result = (result & 0xffffffffUL) + (result >> 32); |
70 | /* 33 to 32 */ |
71 | result = (result & 0xffffffffUL) + (result >> 32); |
72 | return (__force __wsum)result; |
73 | } |
74 | EXPORT_SYMBOL(csum_tcpudp_nofold); |
75 | |
76 | /* |
77 | * Do a 64-bit checksum on an arbitrary memory area.. |
78 | * |
79 | * This isn't a great routine, but it's not _horrible_ either. The |
80 | * inner loop could be unrolled a bit further, and there are better |
81 | * ways to do the carry, but this is reasonable. |
82 | */ |
83 | |
84 | /* optimized HEXAGON intrinsic version, with over read fixed */ |
85 | unsigned int do_csum(const void *voidptr, int len) |
86 | { |
87 | u64 sum0, sum1, x0, x1, *ptr8_o, *ptr8_e, *ptr8; |
88 | int i, start, mid, end, mask; |
89 | const char *ptr = voidptr; |
90 | unsigned short *ptr2; |
91 | unsigned int *ptr4; |
92 | |
93 | if (len <= 0) |
94 | return 0; |
95 | |
96 | start = 0xF & (16-(((int) ptr) & 0xF)) ; |
97 | mask = 0x7fffffffUL >> HEXAGON_R_cl0_R(len); |
98 | start = start & mask ; |
99 | |
100 | mid = len - start; |
101 | end = mid & 0xF; |
102 | mid = mid>>4; |
103 | sum0 = mid << 18; |
104 | sum1 = 0; |
105 | |
106 | if (start & 1) |
107 | sum0 += (u64) (ptr[0] << 8); |
108 | ptr2 = (unsigned short *) &ptr[start & 1]; |
109 | if (start & 2) |
110 | sum1 += (u64) ptr2[0]; |
111 | ptr4 = (unsigned int *) &ptr[start & 3]; |
112 | if (start & 4) { |
113 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, |
114 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), |
115 | VR_SELECT(0, 0, 1, 1)); |
116 | sum0 += VR_SELECT(0, 0, 1, 0); |
117 | } |
118 | ptr8 = (u64 *) &ptr[start & 7]; |
119 | if (start & 8) { |
120 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, |
121 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), |
122 | VR_SELECT(1, 1, 1, 1)); |
123 | sum1 += VR_CARRY(0, 0, 1, 0); |
124 | } |
125 | ptr8_o = (u64 *) (ptr + start); |
126 | ptr8_e = (u64 *) (ptr + start + 8); |
127 | |
128 | if (mid) { |
129 | x0 = *ptr8_e; ptr8_e += 2; |
130 | x1 = *ptr8_o; ptr8_o += 2; |
131 | if (mid > 1) |
132 | for (i = 0; i < mid-1; i++) { |
133 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, |
134 | x0^VR_NEGATE(1, 1, 1, 1), |
135 | VR_SELECT(1, 1, 1, 1)); |
136 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, |
137 | x1^VR_NEGATE(1, 1, 1, 1), |
138 | VR_SELECT(1, 1, 1, 1)); |
139 | x0 = *ptr8_e; ptr8_e += 2; |
140 | x1 = *ptr8_o; ptr8_o += 2; |
141 | } |
142 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, x0^VR_NEGATE(1, 1, 1, 1), |
143 | VR_SELECT(1, 1, 1, 1)); |
144 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, x1^VR_NEGATE(1, 1, 1, 1), |
145 | VR_SELECT(1, 1, 1, 1)); |
146 | } |
147 | |
148 | ptr4 = (unsigned int *) &ptr[start + (mid * 16) + (end & 8)]; |
149 | if (end & 4) { |
150 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, |
151 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), |
152 | VR_SELECT(0, 0, 1, 1)); |
153 | sum1 += VR_SELECT(0, 0, 1, 0); |
154 | } |
155 | ptr2 = (unsigned short *) &ptr[start + (mid * 16) + (end & 12)]; |
156 | if (end & 2) |
157 | sum0 += (u64) ptr2[0]; |
158 | |
159 | if (end & 1) |
160 | sum1 += (u64) ptr[start + (mid * 16) + (end & 14)]; |
161 | |
162 | ptr8 = (u64 *) &ptr[start + (mid * 16)]; |
163 | if (end & 8) { |
164 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, |
165 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), |
166 | VR_SELECT(1, 1, 1, 1)); |
167 | sum0 += VR_CARRY(0, 0, 1, 0); |
168 | } |
169 | sum0 = HEXAGON_P_vrmpyh_PP((sum0+sum1)^VR_NEGATE(0, 0, 0, 1), |
170 | VR_SELECT(0, 0, 1, 1)); |
171 | sum0 += VR_NEGATE(0, 0, 0, 1); |
172 | sum0 = HEXAGON_P_vrmpyh_PP(sum0, VR_SELECT(0, 0, 1, 1)); |
173 | |
174 | if (start & 1) |
175 | sum0 = (sum0 << 8) | (0xFF & (sum0 >> 8)); |
176 | |
177 | return 0xFFFF & sum0; |
178 | } |
179 | |