1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Accelerated CRC32(C) using AArch64 CRC instructions |
4 | * |
5 | * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> |
6 | */ |
7 | |
8 | #include <linux/linkage.h> |
9 | #include <asm/alternative.h> |
10 | #include <asm/assembler.h> |
11 | |
12 | .arch armv8-a+crc |
13 | |
14 | .macro byteorder, reg, be |
15 | .if \be |
16 | CPU_LE( rev \reg, \reg ) |
17 | .else |
18 | CPU_BE( rev \reg, \reg ) |
19 | .endif |
20 | .endm |
21 | |
22 | .macro byteorder16, reg, be |
23 | .if \be |
24 | CPU_LE( rev16 \reg, \reg ) |
25 | .else |
26 | CPU_BE( rev16 \reg, \reg ) |
27 | .endif |
28 | .endm |
29 | |
30 | .macro bitorder, reg, be |
31 | .if \be |
32 | rbit \reg, \reg |
33 | .endif |
34 | .endm |
35 | |
36 | .macro bitorder16, reg, be |
37 | .if \be |
38 | rbit \reg, \reg |
39 | lsr \reg, \reg, #16 |
40 | .endif |
41 | .endm |
42 | |
43 | .macro bitorder8, reg, be |
44 | .if \be |
45 | rbit \reg, \reg |
46 | lsr \reg, \reg, #24 |
47 | .endif |
48 | .endm |
49 | |
50 | .macro __crc32, c, be=0 |
51 | bitorder w0, \be |
52 | cmp x2, #16 |
53 | b.lt 8f // less than 16 bytes |
54 | |
55 | and x7, x2, #0x1f |
56 | and x2, x2, #~0x1f |
57 | cbz x7, 32f // multiple of 32 bytes |
58 | |
59 | and x8, x7, #0xf |
60 | ldp x3, x4, [x1] |
61 | add x8, x8, x1 |
62 | add x1, x1, x7 |
63 | ldp x5, x6, [x8] |
64 | byteorder x3, \be |
65 | byteorder x4, \be |
66 | byteorder x5, \be |
67 | byteorder x6, \be |
68 | bitorder x3, \be |
69 | bitorder x4, \be |
70 | bitorder x5, \be |
71 | bitorder x6, \be |
72 | |
73 | tst x7, #8 |
74 | crc32\c\()x w8, w0, x3 |
75 | csel x3, x3, x4, eq |
76 | csel w0, w0, w8, eq |
77 | tst x7, #4 |
78 | lsr x4, x3, #32 |
79 | crc32\c\()w w8, w0, w3 |
80 | csel x3, x3, x4, eq |
81 | csel w0, w0, w8, eq |
82 | tst x7, #2 |
83 | lsr w4, w3, #16 |
84 | crc32\c\()h w8, w0, w3 |
85 | csel w3, w3, w4, eq |
86 | csel w0, w0, w8, eq |
87 | tst x7, #1 |
88 | crc32\c\()b w8, w0, w3 |
89 | csel w0, w0, w8, eq |
90 | tst x7, #16 |
91 | crc32\c\()x w8, w0, x5 |
92 | crc32\c\()x w8, w8, x6 |
93 | csel w0, w0, w8, eq |
94 | cbz x2, 0f |
95 | |
96 | 32: ldp x3, x4, [x1], #32 |
97 | sub x2, x2, #32 |
98 | ldp x5, x6, [x1, #-16] |
99 | byteorder x3, \be |
100 | byteorder x4, \be |
101 | byteorder x5, \be |
102 | byteorder x6, \be |
103 | bitorder x3, \be |
104 | bitorder x4, \be |
105 | bitorder x5, \be |
106 | bitorder x6, \be |
107 | crc32\c\()x w0, w0, x3 |
108 | crc32\c\()x w0, w0, x4 |
109 | crc32\c\()x w0, w0, x5 |
110 | crc32\c\()x w0, w0, x6 |
111 | cbnz x2, 32b |
112 | 0: bitorder w0, \be |
113 | ret |
114 | |
115 | 8: tbz x2, #3, 4f |
116 | ldr x3, [x1], #8 |
117 | byteorder x3, \be |
118 | bitorder x3, \be |
119 | crc32\c\()x w0, w0, x3 |
120 | 4: tbz x2, #2, 2f |
121 | ldr w3, [x1], #4 |
122 | byteorder w3, \be |
123 | bitorder w3, \be |
124 | crc32\c\()w w0, w0, w3 |
125 | 2: tbz x2, #1, 1f |
126 | ldrh w3, [x1], #2 |
127 | byteorder16 w3, \be |
128 | bitorder16 w3, \be |
129 | crc32\c\()h w0, w0, w3 |
130 | 1: tbz x2, #0, 0f |
131 | ldrb w3, [x1] |
132 | bitorder8 w3, \be |
133 | crc32\c\()b w0, w0, w3 |
134 | 0: bitorder w0, \be |
135 | ret |
136 | .endm |
137 | |
138 | .align 5 |
139 | SYM_FUNC_START(crc32_le) |
140 | alternative_if_not ARM64_HAS_CRC32 |
141 | b crc32_le_base |
142 | alternative_else_nop_endif |
143 | __crc32 |
144 | SYM_FUNC_END(crc32_le) |
145 | |
146 | .align 5 |
147 | SYM_FUNC_START(__crc32c_le) |
148 | alternative_if_not ARM64_HAS_CRC32 |
149 | b __crc32c_le_base |
150 | alternative_else_nop_endif |
151 | __crc32 c |
152 | SYM_FUNC_END(__crc32c_le) |
153 | |
154 | .align 5 |
155 | SYM_FUNC_START(crc32_be) |
156 | alternative_if_not ARM64_HAS_CRC32 |
157 | b crc32_be_base |
158 | alternative_else_nop_endif |
159 | __crc32 be=1 |
160 | SYM_FUNC_END(crc32_be) |
161 | |