1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> |
4 | * |
5 | * Template for XOR operations, instantiated in xor_simd.c. |
6 | * |
7 | * Expected preprocessor definitions: |
8 | * |
9 | * - LINE_WIDTH |
10 | * - XOR_FUNC_NAME(nr) |
11 | * - LD_INOUT_LINE(buf) |
12 | * - LD_AND_XOR_LINE(buf) |
13 | * - ST_LINE(buf) |
14 | */ |
15 | |
16 | void XOR_FUNC_NAME(2)(unsigned long bytes, |
17 | unsigned long * __restrict v1, |
18 | const unsigned long * __restrict v2) |
19 | { |
20 | unsigned long lines = bytes / LINE_WIDTH; |
21 | |
22 | do { |
23 | __asm__ __volatile__ ( |
24 | LD_INOUT_LINE(v1) |
25 | LD_AND_XOR_LINE(v2) |
26 | ST_LINE(v1) |
27 | : : [v1] "r" (v1), [v2] "r" (v2) : "memory" |
28 | ); |
29 | |
30 | v1 += LINE_WIDTH / sizeof(unsigned long); |
31 | v2 += LINE_WIDTH / sizeof(unsigned long); |
32 | } while (--lines > 0); |
33 | } |
34 | |
35 | void XOR_FUNC_NAME(3)(unsigned long bytes, |
36 | unsigned long * __restrict v1, |
37 | const unsigned long * __restrict v2, |
38 | const unsigned long * __restrict v3) |
39 | { |
40 | unsigned long lines = bytes / LINE_WIDTH; |
41 | |
42 | do { |
43 | __asm__ __volatile__ ( |
44 | LD_INOUT_LINE(v1) |
45 | LD_AND_XOR_LINE(v2) |
46 | LD_AND_XOR_LINE(v3) |
47 | ST_LINE(v1) |
48 | : : [v1] "r" (v1), [v2] "r" (v2), [v3] "r" (v3) : "memory" |
49 | ); |
50 | |
51 | v1 += LINE_WIDTH / sizeof(unsigned long); |
52 | v2 += LINE_WIDTH / sizeof(unsigned long); |
53 | v3 += LINE_WIDTH / sizeof(unsigned long); |
54 | } while (--lines > 0); |
55 | } |
56 | |
57 | void XOR_FUNC_NAME(4)(unsigned long bytes, |
58 | unsigned long * __restrict v1, |
59 | const unsigned long * __restrict v2, |
60 | const unsigned long * __restrict v3, |
61 | const unsigned long * __restrict v4) |
62 | { |
63 | unsigned long lines = bytes / LINE_WIDTH; |
64 | |
65 | do { |
66 | __asm__ __volatile__ ( |
67 | LD_INOUT_LINE(v1) |
68 | LD_AND_XOR_LINE(v2) |
69 | LD_AND_XOR_LINE(v3) |
70 | LD_AND_XOR_LINE(v4) |
71 | ST_LINE(v1) |
72 | : : [v1] "r" (v1), [v2] "r" (v2), [v3] "r" (v3), [v4] "r" (v4) |
73 | : "memory" |
74 | ); |
75 | |
76 | v1 += LINE_WIDTH / sizeof(unsigned long); |
77 | v2 += LINE_WIDTH / sizeof(unsigned long); |
78 | v3 += LINE_WIDTH / sizeof(unsigned long); |
79 | v4 += LINE_WIDTH / sizeof(unsigned long); |
80 | } while (--lines > 0); |
81 | } |
82 | |
83 | void XOR_FUNC_NAME(5)(unsigned long bytes, |
84 | unsigned long * __restrict v1, |
85 | const unsigned long * __restrict v2, |
86 | const unsigned long * __restrict v3, |
87 | const unsigned long * __restrict v4, |
88 | const unsigned long * __restrict v5) |
89 | { |
90 | unsigned long lines = bytes / LINE_WIDTH; |
91 | |
92 | do { |
93 | __asm__ __volatile__ ( |
94 | LD_INOUT_LINE(v1) |
95 | LD_AND_XOR_LINE(v2) |
96 | LD_AND_XOR_LINE(v3) |
97 | LD_AND_XOR_LINE(v4) |
98 | LD_AND_XOR_LINE(v5) |
99 | ST_LINE(v1) |
100 | : : [v1] "r" (v1), [v2] "r" (v2), [v3] "r" (v3), [v4] "r" (v4), |
101 | [v5] "r" (v5) : "memory" |
102 | ); |
103 | |
104 | v1 += LINE_WIDTH / sizeof(unsigned long); |
105 | v2 += LINE_WIDTH / sizeof(unsigned long); |
106 | v3 += LINE_WIDTH / sizeof(unsigned long); |
107 | v4 += LINE_WIDTH / sizeof(unsigned long); |
108 | v5 += LINE_WIDTH / sizeof(unsigned long); |
109 | } while (--lines > 0); |
110 | } |
111 | |