1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * LoongArch SIMD XOR operations |
4 | * |
5 | * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> |
6 | */ |
7 | |
8 | #include "xor_simd.h" |
9 | |
10 | /* |
11 | * Process one cache line (64 bytes) per loop. This is assuming all future |
12 | * popular LoongArch cores are similar performance-characteristics-wise to the |
13 | * current models. |
14 | */ |
15 | #define LINE_WIDTH 64 |
16 | |
17 | #ifdef CONFIG_CPU_HAS_LSX |
18 | |
19 | #define LD(reg, base, offset) \ |
20 | "vld $vr" #reg ", %[" #base "], " #offset "\n\t" |
21 | #define ST(reg, base, offset) \ |
22 | "vst $vr" #reg ", %[" #base "], " #offset "\n\t" |
23 | #define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" |
24 | |
25 | #define LD_INOUT_LINE(base) \ |
26 | LD(0, base, 0) \ |
27 | LD(1, base, 16) \ |
28 | LD(2, base, 32) \ |
29 | LD(3, base, 48) |
30 | |
31 | #define LD_AND_XOR_LINE(base) \ |
32 | LD(4, base, 0) \ |
33 | LD(5, base, 16) \ |
34 | LD(6, base, 32) \ |
35 | LD(7, base, 48) \ |
36 | XOR(0, 4) \ |
37 | XOR(1, 5) \ |
38 | XOR(2, 6) \ |
39 | XOR(3, 7) |
40 | |
41 | #define ST_LINE(base) \ |
42 | ST(0, base, 0) \ |
43 | ST(1, base, 16) \ |
44 | ST(2, base, 32) \ |
45 | ST(3, base, 48) |
46 | |
47 | #define XOR_FUNC_NAME(nr) __xor_lsx_##nr |
48 | #include "xor_template.c" |
49 | |
50 | #undef LD |
51 | #undef ST |
52 | #undef XOR |
53 | #undef LD_INOUT_LINE |
54 | #undef LD_AND_XOR_LINE |
55 | #undef ST_LINE |
56 | #undef XOR_FUNC_NAME |
57 | |
58 | #endif /* CONFIG_CPU_HAS_LSX */ |
59 | |
60 | #ifdef CONFIG_CPU_HAS_LASX |
61 | |
62 | #define LD(reg, base, offset) \ |
63 | "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" |
64 | #define ST(reg, base, offset) \ |
65 | "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" |
66 | #define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" |
67 | |
68 | #define LD_INOUT_LINE(base) \ |
69 | LD(0, base, 0) \ |
70 | LD(1, base, 32) |
71 | |
72 | #define LD_AND_XOR_LINE(base) \ |
73 | LD(2, base, 0) \ |
74 | LD(3, base, 32) \ |
75 | XOR(0, 2) \ |
76 | XOR(1, 3) |
77 | |
78 | #define ST_LINE(base) \ |
79 | ST(0, base, 0) \ |
80 | ST(1, base, 32) |
81 | |
82 | #define XOR_FUNC_NAME(nr) __xor_lasx_##nr |
83 | #include "xor_template.c" |
84 | |
85 | #undef LD |
86 | #undef ST |
87 | #undef XOR |
88 | #undef LD_INOUT_LINE |
89 | #undef LD_AND_XOR_LINE |
90 | #undef ST_LINE |
91 | #undef XOR_FUNC_NAME |
92 | |
93 | #endif /* CONFIG_CPU_HAS_LASX */ |
94 | |