1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX) |
4 | * |
5 | * Copyright 2023 WANG Xuerui <git@xen0n.name> |
6 | * |
7 | * Based on the generic RAID-6 code (int.uc): |
8 | * |
9 | * Copyright 2002-2004 H. Peter Anvin |
10 | */ |
11 | |
12 | #include <linux/raid/pq.h> |
13 | #include "loongarch.h" |
14 | |
15 | /* |
16 | * The vector algorithms are currently priority 0, which means the generic |
17 | * scalar algorithms are not being disabled if vector support is present. |
18 | * This is like the similar LoongArch RAID5 XOR code, with the main reason |
19 | * repeated here: it cannot be ruled out at this point of time, that some |
20 | * future (maybe reduced) models could run the vector algorithms slower than |
21 | * the scalar ones, maybe for errata or micro-op reasons. It may be |
22 | * appropriate to revisit this after one or two more uarch generations. |
23 | */ |
24 | |
25 | #ifdef CONFIG_CPU_HAS_LSX |
26 | #define NSIZE 16 |
27 | |
28 | static int raid6_has_lsx(void) |
29 | { |
30 | return cpu_has_lsx; |
31 | } |
32 | |
33 | static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs) |
34 | { |
35 | u8 **dptr = (u8 **)ptrs; |
36 | u8 *p, *q; |
37 | int d, z, z0; |
38 | |
39 | z0 = disks - 3; /* Highest data disk */ |
40 | p = dptr[z0+1]; /* XOR parity */ |
41 | q = dptr[z0+2]; /* RS syndrome */ |
42 | |
43 | kernel_fpu_begin(); |
44 | |
45 | /* |
46 | * $vr0, $vr1, $vr2, $vr3: wp |
47 | * $vr4, $vr5, $vr6, $vr7: wq |
48 | * $vr8, $vr9, $vr10, $vr11: wd |
49 | * $vr12, $vr13, $vr14, $vr15: w2 |
50 | * $vr16, $vr17, $vr18, $vr19: w1 |
51 | */ |
52 | for (d = 0; d < bytes; d += NSIZE*4) { |
53 | /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ |
54 | asm volatile("vld $vr0, %0" : : "m" (dptr[z0][d+0*NSIZE])); |
55 | asm volatile("vld $vr1, %0" : : "m" (dptr[z0][d+1*NSIZE])); |
56 | asm volatile("vld $vr2, %0" : : "m" (dptr[z0][d+2*NSIZE])); |
57 | asm volatile("vld $vr3, %0" : : "m" (dptr[z0][d+3*NSIZE])); |
58 | asm volatile("vori.b $vr4, $vr0, 0" ); |
59 | asm volatile("vori.b $vr5, $vr1, 0" ); |
60 | asm volatile("vori.b $vr6, $vr2, 0" ); |
61 | asm volatile("vori.b $vr7, $vr3, 0" ); |
62 | for (z = z0-1; z >= 0; z--) { |
63 | /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ |
64 | asm volatile("vld $vr8, %0" : : "m" (dptr[z][d+0*NSIZE])); |
65 | asm volatile("vld $vr9, %0" : : "m" (dptr[z][d+1*NSIZE])); |
66 | asm volatile("vld $vr10, %0" : : "m" (dptr[z][d+2*NSIZE])); |
67 | asm volatile("vld $vr11, %0" : : "m" (dptr[z][d+3*NSIZE])); |
68 | /* wp$$ ^= wd$$; */ |
69 | asm volatile("vxor.v $vr0, $vr0, $vr8" ); |
70 | asm volatile("vxor.v $vr1, $vr1, $vr9" ); |
71 | asm volatile("vxor.v $vr2, $vr2, $vr10" ); |
72 | asm volatile("vxor.v $vr3, $vr3, $vr11" ); |
73 | /* w2$$ = MASK(wq$$); */ |
74 | asm volatile("vslti.b $vr12, $vr4, 0" ); |
75 | asm volatile("vslti.b $vr13, $vr5, 0" ); |
76 | asm volatile("vslti.b $vr14, $vr6, 0" ); |
77 | asm volatile("vslti.b $vr15, $vr7, 0" ); |
78 | /* w1$$ = SHLBYTE(wq$$); */ |
79 | asm volatile("vslli.b $vr16, $vr4, 1" ); |
80 | asm volatile("vslli.b $vr17, $vr5, 1" ); |
81 | asm volatile("vslli.b $vr18, $vr6, 1" ); |
82 | asm volatile("vslli.b $vr19, $vr7, 1" ); |
83 | /* w2$$ &= NBYTES(0x1d); */ |
84 | asm volatile("vandi.b $vr12, $vr12, 0x1d" ); |
85 | asm volatile("vandi.b $vr13, $vr13, 0x1d" ); |
86 | asm volatile("vandi.b $vr14, $vr14, 0x1d" ); |
87 | asm volatile("vandi.b $vr15, $vr15, 0x1d" ); |
88 | /* w1$$ ^= w2$$; */ |
89 | asm volatile("vxor.v $vr16, $vr16, $vr12" ); |
90 | asm volatile("vxor.v $vr17, $vr17, $vr13" ); |
91 | asm volatile("vxor.v $vr18, $vr18, $vr14" ); |
92 | asm volatile("vxor.v $vr19, $vr19, $vr15" ); |
93 | /* wq$$ = w1$$ ^ wd$$; */ |
94 | asm volatile("vxor.v $vr4, $vr16, $vr8" ); |
95 | asm volatile("vxor.v $vr5, $vr17, $vr9" ); |
96 | asm volatile("vxor.v $vr6, $vr18, $vr10" ); |
97 | asm volatile("vxor.v $vr7, $vr19, $vr11" ); |
98 | } |
99 | /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ |
100 | asm volatile("vst $vr0, %0" : "=m" (p[d+NSIZE*0])); |
101 | asm volatile("vst $vr1, %0" : "=m" (p[d+NSIZE*1])); |
102 | asm volatile("vst $vr2, %0" : "=m" (p[d+NSIZE*2])); |
103 | asm volatile("vst $vr3, %0" : "=m" (p[d+NSIZE*3])); |
104 | /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ |
105 | asm volatile("vst $vr4, %0" : "=m" (q[d+NSIZE*0])); |
106 | asm volatile("vst $vr5, %0" : "=m" (q[d+NSIZE*1])); |
107 | asm volatile("vst $vr6, %0" : "=m" (q[d+NSIZE*2])); |
108 | asm volatile("vst $vr7, %0" : "=m" (q[d+NSIZE*3])); |
109 | } |
110 | |
111 | kernel_fpu_end(); |
112 | } |
113 | |
114 | static void raid6_lsx_xor_syndrome(int disks, int start, int stop, |
115 | size_t bytes, void **ptrs) |
116 | { |
117 | u8 **dptr = (u8 **)ptrs; |
118 | u8 *p, *q; |
119 | int d, z, z0; |
120 | |
121 | z0 = stop; /* P/Q right side optimization */ |
122 | p = dptr[disks-2]; /* XOR parity */ |
123 | q = dptr[disks-1]; /* RS syndrome */ |
124 | |
125 | kernel_fpu_begin(); |
126 | |
127 | /* |
128 | * $vr0, $vr1, $vr2, $vr3: wp |
129 | * $vr4, $vr5, $vr6, $vr7: wq |
130 | * $vr8, $vr9, $vr10, $vr11: wd |
131 | * $vr12, $vr13, $vr14, $vr15: w2 |
132 | * $vr16, $vr17, $vr18, $vr19: w1 |
133 | */ |
134 | for (d = 0; d < bytes; d += NSIZE*4) { |
135 | /* P/Q data pages */ |
136 | /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ |
137 | asm volatile("vld $vr0, %0" : : "m" (dptr[z0][d+0*NSIZE])); |
138 | asm volatile("vld $vr1, %0" : : "m" (dptr[z0][d+1*NSIZE])); |
139 | asm volatile("vld $vr2, %0" : : "m" (dptr[z0][d+2*NSIZE])); |
140 | asm volatile("vld $vr3, %0" : : "m" (dptr[z0][d+3*NSIZE])); |
141 | asm volatile("vori.b $vr4, $vr0, 0" ); |
142 | asm volatile("vori.b $vr5, $vr1, 0" ); |
143 | asm volatile("vori.b $vr6, $vr2, 0" ); |
144 | asm volatile("vori.b $vr7, $vr3, 0" ); |
145 | for (z = z0-1; z >= start; z--) { |
146 | /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ |
147 | asm volatile("vld $vr8, %0" : : "m" (dptr[z][d+0*NSIZE])); |
148 | asm volatile("vld $vr9, %0" : : "m" (dptr[z][d+1*NSIZE])); |
149 | asm volatile("vld $vr10, %0" : : "m" (dptr[z][d+2*NSIZE])); |
150 | asm volatile("vld $vr11, %0" : : "m" (dptr[z][d+3*NSIZE])); |
151 | /* wp$$ ^= wd$$; */ |
152 | asm volatile("vxor.v $vr0, $vr0, $vr8" ); |
153 | asm volatile("vxor.v $vr1, $vr1, $vr9" ); |
154 | asm volatile("vxor.v $vr2, $vr2, $vr10" ); |
155 | asm volatile("vxor.v $vr3, $vr3, $vr11" ); |
156 | /* w2$$ = MASK(wq$$); */ |
157 | asm volatile("vslti.b $vr12, $vr4, 0" ); |
158 | asm volatile("vslti.b $vr13, $vr5, 0" ); |
159 | asm volatile("vslti.b $vr14, $vr6, 0" ); |
160 | asm volatile("vslti.b $vr15, $vr7, 0" ); |
161 | /* w1$$ = SHLBYTE(wq$$); */ |
162 | asm volatile("vslli.b $vr16, $vr4, 1" ); |
163 | asm volatile("vslli.b $vr17, $vr5, 1" ); |
164 | asm volatile("vslli.b $vr18, $vr6, 1" ); |
165 | asm volatile("vslli.b $vr19, $vr7, 1" ); |
166 | /* w2$$ &= NBYTES(0x1d); */ |
167 | asm volatile("vandi.b $vr12, $vr12, 0x1d" ); |
168 | asm volatile("vandi.b $vr13, $vr13, 0x1d" ); |
169 | asm volatile("vandi.b $vr14, $vr14, 0x1d" ); |
170 | asm volatile("vandi.b $vr15, $vr15, 0x1d" ); |
171 | /* w1$$ ^= w2$$; */ |
172 | asm volatile("vxor.v $vr16, $vr16, $vr12" ); |
173 | asm volatile("vxor.v $vr17, $vr17, $vr13" ); |
174 | asm volatile("vxor.v $vr18, $vr18, $vr14" ); |
175 | asm volatile("vxor.v $vr19, $vr19, $vr15" ); |
176 | /* wq$$ = w1$$ ^ wd$$; */ |
177 | asm volatile("vxor.v $vr4, $vr16, $vr8" ); |
178 | asm volatile("vxor.v $vr5, $vr17, $vr9" ); |
179 | asm volatile("vxor.v $vr6, $vr18, $vr10" ); |
180 | asm volatile("vxor.v $vr7, $vr19, $vr11" ); |
181 | } |
182 | |
183 | /* P/Q left side optimization */ |
184 | for (z = start-1; z >= 0; z--) { |
185 | /* w2$$ = MASK(wq$$); */ |
186 | asm volatile("vslti.b $vr12, $vr4, 0" ); |
187 | asm volatile("vslti.b $vr13, $vr5, 0" ); |
188 | asm volatile("vslti.b $vr14, $vr6, 0" ); |
189 | asm volatile("vslti.b $vr15, $vr7, 0" ); |
190 | /* w1$$ = SHLBYTE(wq$$); */ |
191 | asm volatile("vslli.b $vr16, $vr4, 1" ); |
192 | asm volatile("vslli.b $vr17, $vr5, 1" ); |
193 | asm volatile("vslli.b $vr18, $vr6, 1" ); |
194 | asm volatile("vslli.b $vr19, $vr7, 1" ); |
195 | /* w2$$ &= NBYTES(0x1d); */ |
196 | asm volatile("vandi.b $vr12, $vr12, 0x1d" ); |
197 | asm volatile("vandi.b $vr13, $vr13, 0x1d" ); |
198 | asm volatile("vandi.b $vr14, $vr14, 0x1d" ); |
199 | asm volatile("vandi.b $vr15, $vr15, 0x1d" ); |
200 | /* wq$$ = w1$$ ^ w2$$; */ |
201 | asm volatile("vxor.v $vr4, $vr16, $vr12" ); |
202 | asm volatile("vxor.v $vr5, $vr17, $vr13" ); |
203 | asm volatile("vxor.v $vr6, $vr18, $vr14" ); |
204 | asm volatile("vxor.v $vr7, $vr19, $vr15" ); |
205 | } |
206 | /* |
207 | * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; |
208 | * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; |
209 | */ |
210 | asm volatile( |
211 | "vld $vr20, %0\n\t" |
212 | "vld $vr21, %1\n\t" |
213 | "vld $vr22, %2\n\t" |
214 | "vld $vr23, %3\n\t" |
215 | "vld $vr24, %4\n\t" |
216 | "vld $vr25, %5\n\t" |
217 | "vld $vr26, %6\n\t" |
218 | "vld $vr27, %7\n\t" |
219 | "vxor.v $vr20, $vr20, $vr0\n\t" |
220 | "vxor.v $vr21, $vr21, $vr1\n\t" |
221 | "vxor.v $vr22, $vr22, $vr2\n\t" |
222 | "vxor.v $vr23, $vr23, $vr3\n\t" |
223 | "vxor.v $vr24, $vr24, $vr4\n\t" |
224 | "vxor.v $vr25, $vr25, $vr5\n\t" |
225 | "vxor.v $vr26, $vr26, $vr6\n\t" |
226 | "vxor.v $vr27, $vr27, $vr7\n\t" |
227 | "vst $vr20, %0\n\t" |
228 | "vst $vr21, %1\n\t" |
229 | "vst $vr22, %2\n\t" |
230 | "vst $vr23, %3\n\t" |
231 | "vst $vr24, %4\n\t" |
232 | "vst $vr25, %5\n\t" |
233 | "vst $vr26, %6\n\t" |
234 | "vst $vr27, %7\n\t" |
235 | : "+m" (p[d+NSIZE*0]), "+m" (p[d+NSIZE*1]), |
236 | "+m" (p[d+NSIZE*2]), "+m" (p[d+NSIZE*3]), |
237 | "+m" (q[d+NSIZE*0]), "+m" (q[d+NSIZE*1]), |
238 | "+m" (q[d+NSIZE*2]), "+m" (q[d+NSIZE*3]) |
239 | ); |
240 | } |
241 | |
242 | kernel_fpu_end(); |
243 | } |
244 | |
245 | const struct raid6_calls raid6_lsx = { |
246 | raid6_lsx_gen_syndrome, |
247 | raid6_lsx_xor_syndrome, |
248 | raid6_has_lsx, |
249 | "lsx" , |
250 | .priority = 0 /* see the comment near the top of the file for reason */ |
251 | }; |
252 | |
253 | #undef NSIZE |
254 | #endif /* CONFIG_CPU_HAS_LSX */ |
255 | |
256 | #ifdef CONFIG_CPU_HAS_LASX |
257 | #define NSIZE 32 |
258 | |
259 | static int raid6_has_lasx(void) |
260 | { |
261 | return cpu_has_lasx; |
262 | } |
263 | |
264 | static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs) |
265 | { |
266 | u8 **dptr = (u8 **)ptrs; |
267 | u8 *p, *q; |
268 | int d, z, z0; |
269 | |
270 | z0 = disks - 3; /* Highest data disk */ |
271 | p = dptr[z0+1]; /* XOR parity */ |
272 | q = dptr[z0+2]; /* RS syndrome */ |
273 | |
274 | kernel_fpu_begin(); |
275 | |
276 | /* |
277 | * $xr0, $xr1: wp |
278 | * $xr2, $xr3: wq |
279 | * $xr4, $xr5: wd |
280 | * $xr6, $xr7: w2 |
281 | * $xr8, $xr9: w1 |
282 | */ |
283 | for (d = 0; d < bytes; d += NSIZE*2) { |
284 | /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ |
285 | asm volatile("xvld $xr0, %0" : : "m" (dptr[z0][d+0*NSIZE])); |
286 | asm volatile("xvld $xr1, %0" : : "m" (dptr[z0][d+1*NSIZE])); |
287 | asm volatile("xvori.b $xr2, $xr0, 0" ); |
288 | asm volatile("xvori.b $xr3, $xr1, 0" ); |
289 | for (z = z0-1; z >= 0; z--) { |
290 | /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ |
291 | asm volatile("xvld $xr4, %0" : : "m" (dptr[z][d+0*NSIZE])); |
292 | asm volatile("xvld $xr5, %0" : : "m" (dptr[z][d+1*NSIZE])); |
293 | /* wp$$ ^= wd$$; */ |
294 | asm volatile("xvxor.v $xr0, $xr0, $xr4" ); |
295 | asm volatile("xvxor.v $xr1, $xr1, $xr5" ); |
296 | /* w2$$ = MASK(wq$$); */ |
297 | asm volatile("xvslti.b $xr6, $xr2, 0" ); |
298 | asm volatile("xvslti.b $xr7, $xr3, 0" ); |
299 | /* w1$$ = SHLBYTE(wq$$); */ |
300 | asm volatile("xvslli.b $xr8, $xr2, 1" ); |
301 | asm volatile("xvslli.b $xr9, $xr3, 1" ); |
302 | /* w2$$ &= NBYTES(0x1d); */ |
303 | asm volatile("xvandi.b $xr6, $xr6, 0x1d" ); |
304 | asm volatile("xvandi.b $xr7, $xr7, 0x1d" ); |
305 | /* w1$$ ^= w2$$; */ |
306 | asm volatile("xvxor.v $xr8, $xr8, $xr6" ); |
307 | asm volatile("xvxor.v $xr9, $xr9, $xr7" ); |
308 | /* wq$$ = w1$$ ^ wd$$; */ |
309 | asm volatile("xvxor.v $xr2, $xr8, $xr4" ); |
310 | asm volatile("xvxor.v $xr3, $xr9, $xr5" ); |
311 | } |
312 | /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ |
313 | asm volatile("xvst $xr0, %0" : "=m" (p[d+NSIZE*0])); |
314 | asm volatile("xvst $xr1, %0" : "=m" (p[d+NSIZE*1])); |
315 | /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ |
316 | asm volatile("xvst $xr2, %0" : "=m" (q[d+NSIZE*0])); |
317 | asm volatile("xvst $xr3, %0" : "=m" (q[d+NSIZE*1])); |
318 | } |
319 | |
320 | kernel_fpu_end(); |
321 | } |
322 | |
323 | static void raid6_lasx_xor_syndrome(int disks, int start, int stop, |
324 | size_t bytes, void **ptrs) |
325 | { |
326 | u8 **dptr = (u8 **)ptrs; |
327 | u8 *p, *q; |
328 | int d, z, z0; |
329 | |
330 | z0 = stop; /* P/Q right side optimization */ |
331 | p = dptr[disks-2]; /* XOR parity */ |
332 | q = dptr[disks-1]; /* RS syndrome */ |
333 | |
334 | kernel_fpu_begin(); |
335 | |
336 | /* |
337 | * $xr0, $xr1: wp |
338 | * $xr2, $xr3: wq |
339 | * $xr4, $xr5: wd |
340 | * $xr6, $xr7: w2 |
341 | * $xr8, $xr9: w1 |
342 | */ |
343 | for (d = 0; d < bytes; d += NSIZE*2) { |
344 | /* P/Q data pages */ |
345 | /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ |
346 | asm volatile("xvld $xr0, %0" : : "m" (dptr[z0][d+0*NSIZE])); |
347 | asm volatile("xvld $xr1, %0" : : "m" (dptr[z0][d+1*NSIZE])); |
348 | asm volatile("xvori.b $xr2, $xr0, 0" ); |
349 | asm volatile("xvori.b $xr3, $xr1, 0" ); |
350 | for (z = z0-1; z >= start; z--) { |
351 | /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ |
352 | asm volatile("xvld $xr4, %0" : : "m" (dptr[z][d+0*NSIZE])); |
353 | asm volatile("xvld $xr5, %0" : : "m" (dptr[z][d+1*NSIZE])); |
354 | /* wp$$ ^= wd$$; */ |
355 | asm volatile("xvxor.v $xr0, $xr0, $xr4" ); |
356 | asm volatile("xvxor.v $xr1, $xr1, $xr5" ); |
357 | /* w2$$ = MASK(wq$$); */ |
358 | asm volatile("xvslti.b $xr6, $xr2, 0" ); |
359 | asm volatile("xvslti.b $xr7, $xr3, 0" ); |
360 | /* w1$$ = SHLBYTE(wq$$); */ |
361 | asm volatile("xvslli.b $xr8, $xr2, 1" ); |
362 | asm volatile("xvslli.b $xr9, $xr3, 1" ); |
363 | /* w2$$ &= NBYTES(0x1d); */ |
364 | asm volatile("xvandi.b $xr6, $xr6, 0x1d" ); |
365 | asm volatile("xvandi.b $xr7, $xr7, 0x1d" ); |
366 | /* w1$$ ^= w2$$; */ |
367 | asm volatile("xvxor.v $xr8, $xr8, $xr6" ); |
368 | asm volatile("xvxor.v $xr9, $xr9, $xr7" ); |
369 | /* wq$$ = w1$$ ^ wd$$; */ |
370 | asm volatile("xvxor.v $xr2, $xr8, $xr4" ); |
371 | asm volatile("xvxor.v $xr3, $xr9, $xr5" ); |
372 | } |
373 | |
374 | /* P/Q left side optimization */ |
375 | for (z = start-1; z >= 0; z--) { |
376 | /* w2$$ = MASK(wq$$); */ |
377 | asm volatile("xvslti.b $xr6, $xr2, 0" ); |
378 | asm volatile("xvslti.b $xr7, $xr3, 0" ); |
379 | /* w1$$ = SHLBYTE(wq$$); */ |
380 | asm volatile("xvslli.b $xr8, $xr2, 1" ); |
381 | asm volatile("xvslli.b $xr9, $xr3, 1" ); |
382 | /* w2$$ &= NBYTES(0x1d); */ |
383 | asm volatile("xvandi.b $xr6, $xr6, 0x1d" ); |
384 | asm volatile("xvandi.b $xr7, $xr7, 0x1d" ); |
385 | /* wq$$ = w1$$ ^ w2$$; */ |
386 | asm volatile("xvxor.v $xr2, $xr8, $xr6" ); |
387 | asm volatile("xvxor.v $xr3, $xr9, $xr7" ); |
388 | } |
389 | /* |
390 | * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; |
391 | * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; |
392 | */ |
393 | asm volatile( |
394 | "xvld $xr10, %0\n\t" |
395 | "xvld $xr11, %1\n\t" |
396 | "xvld $xr12, %2\n\t" |
397 | "xvld $xr13, %3\n\t" |
398 | "xvxor.v $xr10, $xr10, $xr0\n\t" |
399 | "xvxor.v $xr11, $xr11, $xr1\n\t" |
400 | "xvxor.v $xr12, $xr12, $xr2\n\t" |
401 | "xvxor.v $xr13, $xr13, $xr3\n\t" |
402 | "xvst $xr10, %0\n\t" |
403 | "xvst $xr11, %1\n\t" |
404 | "xvst $xr12, %2\n\t" |
405 | "xvst $xr13, %3\n\t" |
406 | : "+m" (p[d+NSIZE*0]), "+m" (p[d+NSIZE*1]), |
407 | "+m" (q[d+NSIZE*0]), "+m" (q[d+NSIZE*1]) |
408 | ); |
409 | } |
410 | |
411 | kernel_fpu_end(); |
412 | } |
413 | |
414 | const struct raid6_calls raid6_lasx = { |
415 | raid6_lasx_gen_syndrome, |
416 | raid6_lasx_xor_syndrome, |
417 | raid6_has_lasx, |
418 | "lasx" , |
419 | .priority = 0 /* see the comment near the top of the file for reason */ |
420 | }; |
421 | #undef NSIZE |
422 | #endif /* CONFIG_CPU_HAS_LASX */ |
423 | |