| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) |
| 4 | * |
| 5 | * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> |
| 6 | * |
| 7 | * Originally based on recov_avx2.c and recov_ssse3.c: |
| 8 | * |
| 9 | * Copyright (C) 2012 Intel Corporation |
| 10 | * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> |
| 11 | */ |
| 12 | |
| 13 | #include <linux/raid/pq.h> |
| 14 | #include "loongarch.h" |
| 15 | |
| 16 | /* |
| 17 | * Unlike with the syndrome calculation algorithms, there's no boot-time |
| 18 | * selection of recovery algorithms by benchmarking, so we have to specify |
| 19 | * the priorities and hope the future cores will all have decent vector |
| 20 | * support (i.e. no LASX slower than LSX, or even scalar code). |
| 21 | */ |
| 22 | |
| 23 | #ifdef CONFIG_CPU_HAS_LSX |
| 24 | static int raid6_has_lsx(void) |
| 25 | { |
| 26 | return cpu_has_lsx; |
| 27 | } |
| 28 | |
| 29 | static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, |
| 30 | int failb, void **ptrs) |
| 31 | { |
| 32 | u8 *p, *q, *dp, *dq; |
| 33 | const u8 *pbmul; /* P multiplier table for B data */ |
| 34 | const u8 *qmul; /* Q multiplier table (for both) */ |
| 35 | |
| 36 | p = (u8 *)ptrs[disks - 2]; |
| 37 | q = (u8 *)ptrs[disks - 1]; |
| 38 | |
| 39 | /* |
| 40 | * Compute syndrome with zero for the missing data pages |
| 41 | * Use the dead data pages as temporary storage for |
| 42 | * delta p and delta q |
| 43 | */ |
| 44 | dp = (u8 *)ptrs[faila]; |
| 45 | ptrs[faila] = raid6_get_zero_page(); |
| 46 | ptrs[disks - 2] = dp; |
| 47 | dq = (u8 *)ptrs[failb]; |
| 48 | ptrs[failb] = raid6_get_zero_page(); |
| 49 | ptrs[disks - 1] = dq; |
| 50 | |
| 51 | raid6_call.gen_syndrome(disks, bytes, ptrs); |
| 52 | |
| 53 | /* Restore pointer table */ |
| 54 | ptrs[faila] = dp; |
| 55 | ptrs[failb] = dq; |
| 56 | ptrs[disks - 2] = p; |
| 57 | ptrs[disks - 1] = q; |
| 58 | |
| 59 | /* Now, pick the proper data tables */ |
| 60 | pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; |
| 61 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; |
| 62 | |
| 63 | kernel_fpu_begin(); |
| 64 | |
| 65 | /* |
| 66 | * vr20, vr21: qmul |
| 67 | * vr22, vr23: pbmul |
| 68 | */ |
| 69 | asm volatile("vld $vr20, %0" : : "m" (qmul[0])); |
| 70 | asm volatile("vld $vr21, %0" : : "m" (qmul[16])); |
| 71 | asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); |
| 72 | asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); |
| 73 | |
| 74 | while (bytes) { |
| 75 | /* vr4 - vr7: Q */ |
| 76 | asm volatile("vld $vr4, %0" : : "m" (q[0])); |
| 77 | asm volatile("vld $vr5, %0" : : "m" (q[16])); |
| 78 | asm volatile("vld $vr6, %0" : : "m" (q[32])); |
| 79 | asm volatile("vld $vr7, %0" : : "m" (q[48])); |
| 80 | /* vr4 - vr7: Q + Qxy */ |
| 81 | asm volatile("vld $vr8, %0" : : "m" (dq[0])); |
| 82 | asm volatile("vld $vr9, %0" : : "m" (dq[16])); |
| 83 | asm volatile("vld $vr10, %0" : : "m" (dq[32])); |
| 84 | asm volatile("vld $vr11, %0" : : "m" (dq[48])); |
| 85 | asm volatile("vxor.v $vr4, $vr4, $vr8" ); |
| 86 | asm volatile("vxor.v $vr5, $vr5, $vr9" ); |
| 87 | asm volatile("vxor.v $vr6, $vr6, $vr10" ); |
| 88 | asm volatile("vxor.v $vr7, $vr7, $vr11" ); |
| 89 | /* vr0 - vr3: P */ |
| 90 | asm volatile("vld $vr0, %0" : : "m" (p[0])); |
| 91 | asm volatile("vld $vr1, %0" : : "m" (p[16])); |
| 92 | asm volatile("vld $vr2, %0" : : "m" (p[32])); |
| 93 | asm volatile("vld $vr3, %0" : : "m" (p[48])); |
| 94 | /* vr0 - vr3: P + Pxy */ |
| 95 | asm volatile("vld $vr8, %0" : : "m" (dp[0])); |
| 96 | asm volatile("vld $vr9, %0" : : "m" (dp[16])); |
| 97 | asm volatile("vld $vr10, %0" : : "m" (dp[32])); |
| 98 | asm volatile("vld $vr11, %0" : : "m" (dp[48])); |
| 99 | asm volatile("vxor.v $vr0, $vr0, $vr8" ); |
| 100 | asm volatile("vxor.v $vr1, $vr1, $vr9" ); |
| 101 | asm volatile("vxor.v $vr2, $vr2, $vr10" ); |
| 102 | asm volatile("vxor.v $vr3, $vr3, $vr11" ); |
| 103 | |
| 104 | /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ |
| 105 | asm volatile("vsrli.b $vr8, $vr4, 4" ); |
| 106 | asm volatile("vsrli.b $vr9, $vr5, 4" ); |
| 107 | asm volatile("vsrli.b $vr10, $vr6, 4" ); |
| 108 | asm volatile("vsrli.b $vr11, $vr7, 4" ); |
| 109 | /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ |
| 110 | asm volatile("vandi.b $vr4, $vr4, 0x0f" ); |
| 111 | asm volatile("vandi.b $vr5, $vr5, 0x0f" ); |
| 112 | asm volatile("vandi.b $vr6, $vr6, 0x0f" ); |
| 113 | asm volatile("vandi.b $vr7, $vr7, 0x0f" ); |
| 114 | /* lookup from qmul[0] */ |
| 115 | asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4" ); |
| 116 | asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5" ); |
| 117 | asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6" ); |
| 118 | asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7" ); |
| 119 | /* lookup from qmul[16] */ |
| 120 | asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8" ); |
| 121 | asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9" ); |
| 122 | asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10" ); |
| 123 | asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11" ); |
| 124 | /* vr16 - vr19: B(Q + Qxy) */ |
| 125 | asm volatile("vxor.v $vr16, $vr8, $vr4" ); |
| 126 | asm volatile("vxor.v $vr17, $vr9, $vr5" ); |
| 127 | asm volatile("vxor.v $vr18, $vr10, $vr6" ); |
| 128 | asm volatile("vxor.v $vr19, $vr11, $vr7" ); |
| 129 | |
| 130 | /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ |
| 131 | asm volatile("vsrli.b $vr4, $vr0, 4" ); |
| 132 | asm volatile("vsrli.b $vr5, $vr1, 4" ); |
| 133 | asm volatile("vsrli.b $vr6, $vr2, 4" ); |
| 134 | asm volatile("vsrli.b $vr7, $vr3, 4" ); |
| 135 | /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ |
| 136 | asm volatile("vandi.b $vr12, $vr0, 0x0f" ); |
| 137 | asm volatile("vandi.b $vr13, $vr1, 0x0f" ); |
| 138 | asm volatile("vandi.b $vr14, $vr2, 0x0f" ); |
| 139 | asm volatile("vandi.b $vr15, $vr3, 0x0f" ); |
| 140 | /* lookup from pbmul[0] */ |
| 141 | asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12" ); |
| 142 | asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13" ); |
| 143 | asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14" ); |
| 144 | asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15" ); |
| 145 | /* lookup from pbmul[16] */ |
| 146 | asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4" ); |
| 147 | asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5" ); |
| 148 | asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6" ); |
| 149 | asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7" ); |
| 150 | /* vr4 - vr7: A(P + Pxy) */ |
| 151 | asm volatile("vxor.v $vr4, $vr4, $vr12" ); |
| 152 | asm volatile("vxor.v $vr5, $vr5, $vr13" ); |
| 153 | asm volatile("vxor.v $vr6, $vr6, $vr14" ); |
| 154 | asm volatile("vxor.v $vr7, $vr7, $vr15" ); |
| 155 | |
| 156 | /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ |
| 157 | asm volatile("vxor.v $vr4, $vr4, $vr16" ); |
| 158 | asm volatile("vxor.v $vr5, $vr5, $vr17" ); |
| 159 | asm volatile("vxor.v $vr6, $vr6, $vr18" ); |
| 160 | asm volatile("vxor.v $vr7, $vr7, $vr19" ); |
| 161 | asm volatile("vst $vr4, %0" : "=m" (dq[0])); |
| 162 | asm volatile("vst $vr5, %0" : "=m" (dq[16])); |
| 163 | asm volatile("vst $vr6, %0" : "=m" (dq[32])); |
| 164 | asm volatile("vst $vr7, %0" : "=m" (dq[48])); |
| 165 | |
| 166 | /* vr0 - vr3: P + Pxy + Dx = Dy */ |
| 167 | asm volatile("vxor.v $vr0, $vr0, $vr4" ); |
| 168 | asm volatile("vxor.v $vr1, $vr1, $vr5" ); |
| 169 | asm volatile("vxor.v $vr2, $vr2, $vr6" ); |
| 170 | asm volatile("vxor.v $vr3, $vr3, $vr7" ); |
| 171 | asm volatile("vst $vr0, %0" : "=m" (dp[0])); |
| 172 | asm volatile("vst $vr1, %0" : "=m" (dp[16])); |
| 173 | asm volatile("vst $vr2, %0" : "=m" (dp[32])); |
| 174 | asm volatile("vst $vr3, %0" : "=m" (dp[48])); |
| 175 | |
| 176 | bytes -= 64; |
| 177 | p += 64; |
| 178 | q += 64; |
| 179 | dp += 64; |
| 180 | dq += 64; |
| 181 | } |
| 182 | |
| 183 | kernel_fpu_end(); |
| 184 | } |
| 185 | |
| 186 | static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, |
| 187 | void **ptrs) |
| 188 | { |
| 189 | u8 *p, *q, *dq; |
| 190 | const u8 *qmul; /* Q multiplier table */ |
| 191 | |
| 192 | p = (u8 *)ptrs[disks - 2]; |
| 193 | q = (u8 *)ptrs[disks - 1]; |
| 194 | |
| 195 | /* |
| 196 | * Compute syndrome with zero for the missing data page |
| 197 | * Use the dead data page as temporary storage for delta q |
| 198 | */ |
| 199 | dq = (u8 *)ptrs[faila]; |
| 200 | ptrs[faila] = raid6_get_zero_page(); |
| 201 | ptrs[disks - 1] = dq; |
| 202 | |
| 203 | raid6_call.gen_syndrome(disks, bytes, ptrs); |
| 204 | |
| 205 | /* Restore pointer table */ |
| 206 | ptrs[faila] = dq; |
| 207 | ptrs[disks - 1] = q; |
| 208 | |
| 209 | /* Now, pick the proper data tables */ |
| 210 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; |
| 211 | |
| 212 | kernel_fpu_begin(); |
| 213 | |
| 214 | /* vr22, vr23: qmul */ |
| 215 | asm volatile("vld $vr22, %0" : : "m" (qmul[0])); |
| 216 | asm volatile("vld $vr23, %0" : : "m" (qmul[16])); |
| 217 | |
| 218 | while (bytes) { |
| 219 | /* vr0 - vr3: P + Dx */ |
| 220 | asm volatile("vld $vr0, %0" : : "m" (p[0])); |
| 221 | asm volatile("vld $vr1, %0" : : "m" (p[16])); |
| 222 | asm volatile("vld $vr2, %0" : : "m" (p[32])); |
| 223 | asm volatile("vld $vr3, %0" : : "m" (p[48])); |
| 224 | /* vr4 - vr7: Qx */ |
| 225 | asm volatile("vld $vr4, %0" : : "m" (dq[0])); |
| 226 | asm volatile("vld $vr5, %0" : : "m" (dq[16])); |
| 227 | asm volatile("vld $vr6, %0" : : "m" (dq[32])); |
| 228 | asm volatile("vld $vr7, %0" : : "m" (dq[48])); |
| 229 | /* vr4 - vr7: Q + Qx */ |
| 230 | asm volatile("vld $vr8, %0" : : "m" (q[0])); |
| 231 | asm volatile("vld $vr9, %0" : : "m" (q[16])); |
| 232 | asm volatile("vld $vr10, %0" : : "m" (q[32])); |
| 233 | asm volatile("vld $vr11, %0" : : "m" (q[48])); |
| 234 | asm volatile("vxor.v $vr4, $vr4, $vr8" ); |
| 235 | asm volatile("vxor.v $vr5, $vr5, $vr9" ); |
| 236 | asm volatile("vxor.v $vr6, $vr6, $vr10" ); |
| 237 | asm volatile("vxor.v $vr7, $vr7, $vr11" ); |
| 238 | |
| 239 | /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ |
| 240 | asm volatile("vsrli.b $vr8, $vr4, 4" ); |
| 241 | asm volatile("vsrli.b $vr9, $vr5, 4" ); |
| 242 | asm volatile("vsrli.b $vr10, $vr6, 4" ); |
| 243 | asm volatile("vsrli.b $vr11, $vr7, 4" ); |
| 244 | /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ |
| 245 | asm volatile("vandi.b $vr4, $vr4, 0x0f" ); |
| 246 | asm volatile("vandi.b $vr5, $vr5, 0x0f" ); |
| 247 | asm volatile("vandi.b $vr6, $vr6, 0x0f" ); |
| 248 | asm volatile("vandi.b $vr7, $vr7, 0x0f" ); |
| 249 | /* lookup from qmul[0] */ |
| 250 | asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4" ); |
| 251 | asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5" ); |
| 252 | asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6" ); |
| 253 | asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7" ); |
| 254 | /* lookup from qmul[16] */ |
| 255 | asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8" ); |
| 256 | asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9" ); |
| 257 | asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10" ); |
| 258 | asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11" ); |
| 259 | /* vr4 - vr7: qmul(Q + Qx) = Dx */ |
| 260 | asm volatile("vxor.v $vr4, $vr4, $vr8" ); |
| 261 | asm volatile("vxor.v $vr5, $vr5, $vr9" ); |
| 262 | asm volatile("vxor.v $vr6, $vr6, $vr10" ); |
| 263 | asm volatile("vxor.v $vr7, $vr7, $vr11" ); |
| 264 | asm volatile("vst $vr4, %0" : "=m" (dq[0])); |
| 265 | asm volatile("vst $vr5, %0" : "=m" (dq[16])); |
| 266 | asm volatile("vst $vr6, %0" : "=m" (dq[32])); |
| 267 | asm volatile("vst $vr7, %0" : "=m" (dq[48])); |
| 268 | |
| 269 | /* vr0 - vr3: P + Dx + Dx = P */ |
| 270 | asm volatile("vxor.v $vr0, $vr0, $vr4" ); |
| 271 | asm volatile("vxor.v $vr1, $vr1, $vr5" ); |
| 272 | asm volatile("vxor.v $vr2, $vr2, $vr6" ); |
| 273 | asm volatile("vxor.v $vr3, $vr3, $vr7" ); |
| 274 | asm volatile("vst $vr0, %0" : "=m" (p[0])); |
| 275 | asm volatile("vst $vr1, %0" : "=m" (p[16])); |
| 276 | asm volatile("vst $vr2, %0" : "=m" (p[32])); |
| 277 | asm volatile("vst $vr3, %0" : "=m" (p[48])); |
| 278 | |
| 279 | bytes -= 64; |
| 280 | p += 64; |
| 281 | q += 64; |
| 282 | dq += 64; |
| 283 | } |
| 284 | |
| 285 | kernel_fpu_end(); |
| 286 | } |
| 287 | |
| 288 | const struct raid6_recov_calls raid6_recov_lsx = { |
| 289 | .data2 = raid6_2data_recov_lsx, |
| 290 | .datap = raid6_datap_recov_lsx, |
| 291 | .valid = raid6_has_lsx, |
| 292 | .name = "lsx" , |
| 293 | .priority = 1, |
| 294 | }; |
| 295 | #endif /* CONFIG_CPU_HAS_LSX */ |
| 296 | |
| 297 | #ifdef CONFIG_CPU_HAS_LASX |
| 298 | static int raid6_has_lasx(void) |
| 299 | { |
| 300 | return cpu_has_lasx; |
| 301 | } |
| 302 | |
| 303 | static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, |
| 304 | int failb, void **ptrs) |
| 305 | { |
| 306 | u8 *p, *q, *dp, *dq; |
| 307 | const u8 *pbmul; /* P multiplier table for B data */ |
| 308 | const u8 *qmul; /* Q multiplier table (for both) */ |
| 309 | |
| 310 | p = (u8 *)ptrs[disks - 2]; |
| 311 | q = (u8 *)ptrs[disks - 1]; |
| 312 | |
| 313 | /* |
| 314 | * Compute syndrome with zero for the missing data pages |
| 315 | * Use the dead data pages as temporary storage for |
| 316 | * delta p and delta q |
| 317 | */ |
| 318 | dp = (u8 *)ptrs[faila]; |
| 319 | ptrs[faila] = raid6_get_zero_page(); |
| 320 | ptrs[disks - 2] = dp; |
| 321 | dq = (u8 *)ptrs[failb]; |
| 322 | ptrs[failb] = raid6_get_zero_page(); |
| 323 | ptrs[disks - 1] = dq; |
| 324 | |
| 325 | raid6_call.gen_syndrome(disks, bytes, ptrs); |
| 326 | |
| 327 | /* Restore pointer table */ |
| 328 | ptrs[faila] = dp; |
| 329 | ptrs[failb] = dq; |
| 330 | ptrs[disks - 2] = p; |
| 331 | ptrs[disks - 1] = q; |
| 332 | |
| 333 | /* Now, pick the proper data tables */ |
| 334 | pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; |
| 335 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; |
| 336 | |
| 337 | kernel_fpu_begin(); |
| 338 | |
| 339 | /* |
| 340 | * xr20, xr21: qmul |
| 341 | * xr22, xr23: pbmul |
| 342 | */ |
| 343 | asm volatile("vld $vr20, %0" : : "m" (qmul[0])); |
| 344 | asm volatile("vld $vr21, %0" : : "m" (qmul[16])); |
| 345 | asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); |
| 346 | asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); |
| 347 | asm volatile("xvreplve0.q $xr20, $xr20" ); |
| 348 | asm volatile("xvreplve0.q $xr21, $xr21" ); |
| 349 | asm volatile("xvreplve0.q $xr22, $xr22" ); |
| 350 | asm volatile("xvreplve0.q $xr23, $xr23" ); |
| 351 | |
| 352 | while (bytes) { |
| 353 | /* xr0, xr1: Q */ |
| 354 | asm volatile("xvld $xr0, %0" : : "m" (q[0])); |
| 355 | asm volatile("xvld $xr1, %0" : : "m" (q[32])); |
| 356 | /* xr0, xr1: Q + Qxy */ |
| 357 | asm volatile("xvld $xr4, %0" : : "m" (dq[0])); |
| 358 | asm volatile("xvld $xr5, %0" : : "m" (dq[32])); |
| 359 | asm volatile("xvxor.v $xr0, $xr0, $xr4" ); |
| 360 | asm volatile("xvxor.v $xr1, $xr1, $xr5" ); |
| 361 | /* xr2, xr3: P */ |
| 362 | asm volatile("xvld $xr2, %0" : : "m" (p[0])); |
| 363 | asm volatile("xvld $xr3, %0" : : "m" (p[32])); |
| 364 | /* xr2, xr3: P + Pxy */ |
| 365 | asm volatile("xvld $xr4, %0" : : "m" (dp[0])); |
| 366 | asm volatile("xvld $xr5, %0" : : "m" (dp[32])); |
| 367 | asm volatile("xvxor.v $xr2, $xr2, $xr4" ); |
| 368 | asm volatile("xvxor.v $xr3, $xr3, $xr5" ); |
| 369 | |
| 370 | /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ |
| 371 | asm volatile("xvsrli.b $xr4, $xr0, 4" ); |
| 372 | asm volatile("xvsrli.b $xr5, $xr1, 4" ); |
| 373 | /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ |
| 374 | asm volatile("xvandi.b $xr0, $xr0, 0x0f" ); |
| 375 | asm volatile("xvandi.b $xr1, $xr1, 0x0f" ); |
| 376 | /* lookup from qmul[0] */ |
| 377 | asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0" ); |
| 378 | asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1" ); |
| 379 | /* lookup from qmul[16] */ |
| 380 | asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4" ); |
| 381 | asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5" ); |
| 382 | /* xr6, xr7: B(Q + Qxy) */ |
| 383 | asm volatile("xvxor.v $xr6, $xr4, $xr0" ); |
| 384 | asm volatile("xvxor.v $xr7, $xr5, $xr1" ); |
| 385 | |
| 386 | /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ |
| 387 | asm volatile("xvsrli.b $xr4, $xr2, 4" ); |
| 388 | asm volatile("xvsrli.b $xr5, $xr3, 4" ); |
| 389 | /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ |
| 390 | asm volatile("xvandi.b $xr0, $xr2, 0x0f" ); |
| 391 | asm volatile("xvandi.b $xr1, $xr3, 0x0f" ); |
| 392 | /* lookup from pbmul[0] */ |
| 393 | asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0" ); |
| 394 | asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1" ); |
| 395 | /* lookup from pbmul[16] */ |
| 396 | asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4" ); |
| 397 | asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5" ); |
| 398 | /* xr0, xr1: A(P + Pxy) */ |
| 399 | asm volatile("xvxor.v $xr0, $xr0, $xr4" ); |
| 400 | asm volatile("xvxor.v $xr1, $xr1, $xr5" ); |
| 401 | |
| 402 | /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ |
| 403 | asm volatile("xvxor.v $xr0, $xr0, $xr6" ); |
| 404 | asm volatile("xvxor.v $xr1, $xr1, $xr7" ); |
| 405 | |
| 406 | /* xr2, xr3: P + Pxy + Dx = Dy */ |
| 407 | asm volatile("xvxor.v $xr2, $xr2, $xr0" ); |
| 408 | asm volatile("xvxor.v $xr3, $xr3, $xr1" ); |
| 409 | |
| 410 | asm volatile("xvst $xr0, %0" : "=m" (dq[0])); |
| 411 | asm volatile("xvst $xr1, %0" : "=m" (dq[32])); |
| 412 | asm volatile("xvst $xr2, %0" : "=m" (dp[0])); |
| 413 | asm volatile("xvst $xr3, %0" : "=m" (dp[32])); |
| 414 | |
| 415 | bytes -= 64; |
| 416 | p += 64; |
| 417 | q += 64; |
| 418 | dp += 64; |
| 419 | dq += 64; |
| 420 | } |
| 421 | |
| 422 | kernel_fpu_end(); |
| 423 | } |
| 424 | |
| 425 | static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, |
| 426 | void **ptrs) |
| 427 | { |
| 428 | u8 *p, *q, *dq; |
| 429 | const u8 *qmul; /* Q multiplier table */ |
| 430 | |
| 431 | p = (u8 *)ptrs[disks - 2]; |
| 432 | q = (u8 *)ptrs[disks - 1]; |
| 433 | |
| 434 | /* |
| 435 | * Compute syndrome with zero for the missing data page |
| 436 | * Use the dead data page as temporary storage for delta q |
| 437 | */ |
| 438 | dq = (u8 *)ptrs[faila]; |
| 439 | ptrs[faila] = raid6_get_zero_page(); |
| 440 | ptrs[disks - 1] = dq; |
| 441 | |
| 442 | raid6_call.gen_syndrome(disks, bytes, ptrs); |
| 443 | |
| 444 | /* Restore pointer table */ |
| 445 | ptrs[faila] = dq; |
| 446 | ptrs[disks - 1] = q; |
| 447 | |
| 448 | /* Now, pick the proper data tables */ |
| 449 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; |
| 450 | |
| 451 | kernel_fpu_begin(); |
| 452 | |
| 453 | /* xr22, xr23: qmul */ |
| 454 | asm volatile("vld $vr22, %0" : : "m" (qmul[0])); |
| 455 | asm volatile("xvreplve0.q $xr22, $xr22" ); |
| 456 | asm volatile("vld $vr23, %0" : : "m" (qmul[16])); |
| 457 | asm volatile("xvreplve0.q $xr23, $xr23" ); |
| 458 | |
| 459 | while (bytes) { |
| 460 | /* xr0, xr1: P + Dx */ |
| 461 | asm volatile("xvld $xr0, %0" : : "m" (p[0])); |
| 462 | asm volatile("xvld $xr1, %0" : : "m" (p[32])); |
| 463 | /* xr2, xr3: Qx */ |
| 464 | asm volatile("xvld $xr2, %0" : : "m" (dq[0])); |
| 465 | asm volatile("xvld $xr3, %0" : : "m" (dq[32])); |
| 466 | /* xr2, xr3: Q + Qx */ |
| 467 | asm volatile("xvld $xr4, %0" : : "m" (q[0])); |
| 468 | asm volatile("xvld $xr5, %0" : : "m" (q[32])); |
| 469 | asm volatile("xvxor.v $xr2, $xr2, $xr4" ); |
| 470 | asm volatile("xvxor.v $xr3, $xr3, $xr5" ); |
| 471 | |
| 472 | /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ |
| 473 | asm volatile("xvsrli.b $xr4, $xr2, 4" ); |
| 474 | asm volatile("xvsrli.b $xr5, $xr3, 4" ); |
| 475 | /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ |
| 476 | asm volatile("xvandi.b $xr2, $xr2, 0x0f" ); |
| 477 | asm volatile("xvandi.b $xr3, $xr3, 0x0f" ); |
| 478 | /* lookup from qmul[0] */ |
| 479 | asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2" ); |
| 480 | asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3" ); |
| 481 | /* lookup from qmul[16] */ |
| 482 | asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4" ); |
| 483 | asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5" ); |
| 484 | /* xr2, xr3: qmul(Q + Qx) = Dx */ |
| 485 | asm volatile("xvxor.v $xr2, $xr2, $xr4" ); |
| 486 | asm volatile("xvxor.v $xr3, $xr3, $xr5" ); |
| 487 | |
| 488 | /* xr0, xr1: P + Dx + Dx = P */ |
| 489 | asm volatile("xvxor.v $xr0, $xr0, $xr2" ); |
| 490 | asm volatile("xvxor.v $xr1, $xr1, $xr3" ); |
| 491 | |
| 492 | asm volatile("xvst $xr2, %0" : "=m" (dq[0])); |
| 493 | asm volatile("xvst $xr3, %0" : "=m" (dq[32])); |
| 494 | asm volatile("xvst $xr0, %0" : "=m" (p[0])); |
| 495 | asm volatile("xvst $xr1, %0" : "=m" (p[32])); |
| 496 | |
| 497 | bytes -= 64; |
| 498 | p += 64; |
| 499 | q += 64; |
| 500 | dq += 64; |
| 501 | } |
| 502 | |
| 503 | kernel_fpu_end(); |
| 504 | } |
| 505 | |
| 506 | const struct raid6_recov_calls raid6_recov_lasx = { |
| 507 | .data2 = raid6_2data_recov_lasx, |
| 508 | .datap = raid6_datap_recov_lasx, |
| 509 | .valid = raid6_has_lasx, |
| 510 | .name = "lasx" , |
| 511 | .priority = 2, |
| 512 | }; |
| 513 | #endif /* CONFIG_CPU_HAS_LASX */ |
| 514 | |