1/* Optimized strncmp implementation using basic LoongArch instructions.
2 Copyright (C) 2023-2024 Free Software Foundation, Inc.
3
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include <sys/regdef.h>
22#include <sys/asm.h>
23
24#if IS_IN (libc)
25# define STRNCMP __strncmp_aligned
26#else
27# define STRNCMP strncmp
28#endif
29
30LEAF(STRNCMP, 6)
31 beqz a2, L(ret0)
32 lu12i.w a5, 0x01010
33 andi a3, a0, 0x7
34 ori a5, a5, 0x101
35
36 andi a4, a1, 0x7
37 bstrins.d a5, a5, 63, 32
38 li.d t7, -1
39 li.d t8, 8
40
41 addi.d a2, a2, -1
42 slli.d a6, a5, 7
43 bne a3, a4, L(unaligned)
44 bstrins.d a0, zero, 2, 0
45
46 bstrins.d a1, zero, 2, 0
47 ld.d t0, a0, 0
48 ld.d t1, a1, 0
49 slli.d t2, a3, 3
50
51
52 sub.d t5, t8, a3
53 srl.d t3, t7, t2
54 srl.d t0, t0, t2
55 srl.d t1, t1, t2
56
57 orn t0, t0, t3
58 orn t1, t1, t3
59 sub.d t2, t0, a5
60 andn t3, a6, t0
61
62 and t2, t2, t3
63 bne t0, t1, L(al_end)
64 sltu t4, a2, t5
65 sub.d a2, a2, t5
66
67L(al_loop):
68 or t4, t2, t4
69 bnez t4, L(ret0)
70 ldx.d t0, a0, t8
71 ldx.d t1, a1, t8
72
73
74 addi.d t8, t8, 8
75 sltui t4, a2, 8
76 addi.d a2, a2, -8
77 sub.d t2, t0, a5
78
79 andn t3, a6, t0
80 and t2, t2, t3
81 beq t0, t1, L(al_loop)
82 addi.d a2, a2, 8
83
84L(al_end):
85 xor t3, t0, t1
86 or t2, t2, t3
87 ctz.d t2, t2
88 srli.d t4, t2, 3
89
90 bstrins.d t2, zero, 2, 0
91 srl.d t0, t0, t2
92 srl.d t1, t1, t2
93 andi t0, t0, 0xff
94
95
96 andi t1, t1, 0xff
97 sltu t2, a2, t4
98 sub.d a0, t0, t1
99 masknez a0, a0, t2
100
101 jr ra
102L(ret0):
103 move a0, zero
104 jr ra
105 nop
106
107L(unaligned):
108 slt a7, a4, a3
109 xor t0, a0, a1
110 maskeqz t0, t0, a7
111 xor a0, a0, t0
112
113 xor a1, a1, t0
114 andi a3, a0, 0x7
115 andi a4, a1, 0x7
116 bstrins.d a0, zero, 2, 0
117
118
119 bstrins.d a1, zero, 2, 0
120 ld.d t4, a0, 0
121 ld.d t1, a1, 0
122 slli.d t2, a3, 3
123
124 slli.d t3, a4, 3
125 srl.d t5, t7, t3
126 srl.d t0, t4, t2
127 srl.d t1, t1, t3
128
129 orn t0, t0, t5
130 orn t1, t1, t5
131 bne t0, t1, L(not_equal)
132 sub.d t6, t8, a4
133
134 sub.d a4, t2, t3
135 sll.d t2, t7, t2
136 sub.d t5, t8, a3
137 orn t4, t4, t2
138
139
140 sub.d t2, t4, a5
141 andn t3, a6, t4
142 sltu t7, a2, t5
143 and t2, t2, t3
144
145 sub.d a3, zero, a4
146 or t2, t2, t7
147 bnez t2, L(un_end)
148 sub.d t7, t5, t6
149
150 sub.d a2, a2, t5
151 sub.d t6, t8, t7
152L(un_loop):
153 srl.d t5, t4, a4
154 ldx.d t4, a0, t8
155
156 ldx.d t1, a1, t8
157 addi.d t8, t8, 8
158 sll.d t0, t4, a3
159 or t0, t0, t5
160
161
162 bne t0, t1, L(loop_not_equal)
163 sub.d t2, t4, a5
164 andn t3, a6, t4
165 sltui t5, a2, 8
166
167 and t2, t2, t3
168 addi.d a2, a2, -8
169 or t3, t2, t5
170 beqz t3, L(un_loop)
171
172 addi.d a2, a2, 8
173L(un_end):
174 sub.d t2, t0, a5
175 andn t3, a6, t0
176 sltu t5, a2, t6
177
178 and t2, t2, t3
179 or t2, t2, t5
180 bnez t2, L(ret0)
181 ldx.d t1, a1, t8
182
183
184 srl.d t0, t4, a4
185 sub.d a2, a2, t6
186L(not_equal):
187 sub.d t2, t0, a5
188 andn t3, a6, t0
189
190 xor t4, t0, t1
191 and t2, t2, t3
192 or t2, t2, t4
193 ctz.d t2, t2
194
195 bstrins.d t2, zero, 2, 0
196 srli.d t4, t2, 3
197 srl.d t0, t0, t2
198 srl.d t1, t1, t2
199
200 andi t0, t0, 0xff
201 andi t1, t1, 0xff
202 sub.d t2, t0, t1
203 sub.d t3, t1, t0
204
205
206 masknez t0, t2, a7
207 maskeqz t1, t3, a7
208 sltu t2, a2, t4
209 or a0, t0, t1
210
211 masknez a0, a0, t2
212 jr ra
213L(loop_not_equal):
214 add.d a2, a2, t7
215 b L(not_equal)
216END(STRNCMP)
217
218libc_hidden_builtin_def (STRNCMP)
219

source code of glibc/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S