1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
4 *
5 * Template for XOR operations, instantiated in xor_simd.c.
6 *
7 * Expected preprocessor definitions:
8 *
9 * - LINE_WIDTH
10 * - XOR_FUNC_NAME(nr)
11 * - LD_INOUT_LINE(buf)
12 * - LD_AND_XOR_LINE(buf)
13 * - ST_LINE(buf)
14 */
15
16void XOR_FUNC_NAME(2)(unsigned long bytes,
17 unsigned long * __restrict v1,
18 const unsigned long * __restrict v2)
19{
20 unsigned long lines = bytes / LINE_WIDTH;
21
22 do {
23 __asm__ __volatile__ (
24 LD_INOUT_LINE(v1)
25 LD_AND_XOR_LINE(v2)
26 ST_LINE(v1)
27 : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
28 );
29
30 v1 += LINE_WIDTH / sizeof(unsigned long);
31 v2 += LINE_WIDTH / sizeof(unsigned long);
32 } while (--lines > 0);
33}
34
35void XOR_FUNC_NAME(3)(unsigned long bytes,
36 unsigned long * __restrict v1,
37 const unsigned long * __restrict v2,
38 const unsigned long * __restrict v3)
39{
40 unsigned long lines = bytes / LINE_WIDTH;
41
42 do {
43 __asm__ __volatile__ (
44 LD_INOUT_LINE(v1)
45 LD_AND_XOR_LINE(v2)
46 LD_AND_XOR_LINE(v3)
47 ST_LINE(v1)
48 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
49 );
50
51 v1 += LINE_WIDTH / sizeof(unsigned long);
52 v2 += LINE_WIDTH / sizeof(unsigned long);
53 v3 += LINE_WIDTH / sizeof(unsigned long);
54 } while (--lines > 0);
55}
56
57void XOR_FUNC_NAME(4)(unsigned long bytes,
58 unsigned long * __restrict v1,
59 const unsigned long * __restrict v2,
60 const unsigned long * __restrict v3,
61 const unsigned long * __restrict v4)
62{
63 unsigned long lines = bytes / LINE_WIDTH;
64
65 do {
66 __asm__ __volatile__ (
67 LD_INOUT_LINE(v1)
68 LD_AND_XOR_LINE(v2)
69 LD_AND_XOR_LINE(v3)
70 LD_AND_XOR_LINE(v4)
71 ST_LINE(v1)
72 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
73 : "memory"
74 );
75
76 v1 += LINE_WIDTH / sizeof(unsigned long);
77 v2 += LINE_WIDTH / sizeof(unsigned long);
78 v3 += LINE_WIDTH / sizeof(unsigned long);
79 v4 += LINE_WIDTH / sizeof(unsigned long);
80 } while (--lines > 0);
81}
82
83void XOR_FUNC_NAME(5)(unsigned long bytes,
84 unsigned long * __restrict v1,
85 const unsigned long * __restrict v2,
86 const unsigned long * __restrict v3,
87 const unsigned long * __restrict v4,
88 const unsigned long * __restrict v5)
89{
90 unsigned long lines = bytes / LINE_WIDTH;
91
92 do {
93 __asm__ __volatile__ (
94 LD_INOUT_LINE(v1)
95 LD_AND_XOR_LINE(v2)
96 LD_AND_XOR_LINE(v3)
97 LD_AND_XOR_LINE(v4)
98 LD_AND_XOR_LINE(v5)
99 ST_LINE(v1)
100 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
101 [v5] "r"(v5) : "memory"
102 );
103
104 v1 += LINE_WIDTH / sizeof(unsigned long);
105 v2 += LINE_WIDTH / sizeof(unsigned long);
106 v3 += LINE_WIDTH / sizeof(unsigned long);
107 v4 += LINE_WIDTH / sizeof(unsigned long);
108 v5 += LINE_WIDTH / sizeof(unsigned long);
109 } while (--lines > 0);
110}
111

source code of linux/arch/loongarch/lib/xor_template.c