1/* Copyright (C) 1996-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library. If not, see
16 <https://www.gnu.org/licenses/>. */
17
18/* Bytewise compare two null-terminated strings of length no longer than N. */
19
20#include <sysdep.h>
21
22 .set noat
23 .set noreorder
24
25/* EV6 only predicts one branch per octaword. We'll use these to push
26 subsequent branches back to the next bundle. This will generally add
27 a fetch+decode cycle to older machines, so skip in that case. */
28#ifdef __alpha_fix__
29# define ev6_unop unop
30#else
31# define ev6_unop
32#endif
33
34 .text
35
36ENTRY(strncmp)
37#ifdef PROF
38 ldgp gp, 0(pv)
39 lda AT, _mcount
40 jsr AT, (AT), _mcount
41 .prologue 1
42#else
43 .prologue 0
44#endif
45
46 xor a0, a1, t2 # are s1 and s2 co-aligned?
47 beq a2, $zerolength
48 ldq_u t0, 0(a0) # load asap to give cache time to catch up
49 ldq_u t1, 0(a1)
50 lda t3, -1
51 and t2, 7, t2
52 srl t3, 1, t6
53 and a0, 7, t4 # find s1 misalignment
54 and a1, 7, t5 # find s2 misalignment
55 cmovlt a2, t6, a2 # bound neg count to LONG_MAX
56 addq a1, a2, a3 # s2+count
57 addq a2, t4, a2 # bias count by s1 misalignment
58 and a2, 7, t10 # ofs of last byte in s1 last word
59 srl a2, 3, a2 # remaining full words in s1 count
60 bne t2, $unaligned
61
62 /* On entry to this basic block:
63 t0 == the first word of s1.
64 t1 == the first word of s2.
65 t3 == -1. */
66$aligned:
67 mskqh t3, a1, t8 # mask off leading garbage
68 ornot t1, t8, t1
69 ornot t0, t8, t0
70 cmpbge zero, t1, t7 # bits set iff null found
71 beq a2, $eoc # check end of count
72 bne t7, $eos
73 beq t10, $ant_loop
74
75 /* Aligned compare main loop.
76 On entry to this basic block:
77 t0 == an s1 word.
78 t1 == an s2 word not containing a null. */
79
80 .align 4
81$a_loop:
82 xor t0, t1, t2 # e0 :
83 bne t2, $wordcmp # .. e1 (zdb)
84 ldq_u t1, 8(a1) # e0 :
85 ldq_u t0, 8(a0) # .. e1 :
86
87 subq a2, 1, a2 # e0 :
88 addq a1, 8, a1 # .. e1 :
89 addq a0, 8, a0 # e0 :
90 beq a2, $eoc # .. e1 :
91
92 cmpbge zero, t1, t7 # e0 :
93 beq t7, $a_loop # .. e1 :
94
95 br $eos
96
97 /* Alternate aligned compare loop, for when there's no trailing
98 bytes on the count. We have to avoid reading too much data. */
99 .align 4
100$ant_loop:
101 xor t0, t1, t2 # e0 :
102 ev6_unop
103 ev6_unop
104 bne t2, $wordcmp # .. e1 (zdb)
105
106 subq a2, 1, a2 # e0 :
107 beq a2, $zerolength # .. e1 :
108 ldq_u t1, 8(a1) # e0 :
109 ldq_u t0, 8(a0) # .. e1 :
110
111 addq a1, 8, a1 # e0 :
112 addq a0, 8, a0 # .. e1 :
113 cmpbge zero, t1, t7 # e0 :
114 beq t7, $ant_loop # .. e1 :
115
116 br $eos
117
118 /* The two strings are not co-aligned. Align s1 and cope. */
119 /* On entry to this basic block:
120 t0 == the first word of s1.
121 t1 == the first word of s2.
122 t3 == -1.
123 t4 == misalignment of s1.
124 t5 == misalignment of s2.
125 t10 == misalignment of s1 end. */
126 .align 4
127$unaligned:
128 /* If s1 misalignment is larger than s2 misalignment, we need
129 extra startup checks to avoid SEGV. */
130 subq a1, t4, a1 # adjust s2 for s1 misalignment
131 cmpult t4, t5, t9
132 subq a3, 1, a3 # last byte of s2
133 bic a1, 7, t8
134 mskqh t3, t5, t7 # mask garbage in s2
135 subq a3, t8, a3
136 ornot t1, t7, t7
137 srl a3, 3, a3 # remaining full words in s2 count
138 beq t9, $u_head
139
140 /* Failing that, we need to look for both eos and eoc within the
141 first word of s2. If we find either, we can continue by
142 pretending that the next word of s2 is all zeros. */
143 lda t2, 0 # next = zero
144 cmpeq a3, 0, t8 # eoc in the first word of s2?
145 cmpbge zero, t7, t7 # eos in the first word of s2?
146 or t7, t8, t8
147 bne t8, $u_head_nl
148
149 /* We know just enough now to be able to assemble the first
150 full word of s2. We can still find a zero at the end of it.
151
152 On entry to this basic block:
153 t0 == first word of s1
154 t1 == first partial word of s2.
155 t3 == -1.
156 t10 == ofs of last byte in s1 last word.
157 t11 == ofs of last byte in s2 last word. */
158$u_head:
159 ldq_u t2, 8(a1) # load second partial s2 word
160 subq a3, 1, a3
161$u_head_nl:
162 extql t1, a1, t1 # create first s2 word
163 mskqh t3, a0, t8
164 extqh t2, a1, t4
165 ornot t0, t8, t0 # kill s1 garbage
166 or t1, t4, t1 # s2 word now complete
167 cmpbge zero, t0, t7 # find eos in first s1 word
168 ornot t1, t8, t1 # kill s2 garbage
169 beq a2, $eoc
170 subq a2, 1, a2
171 bne t7, $eos
172 mskql t3, a1, t8 # mask out s2[1] bits we have seen
173 xor t0, t1, t4 # compare aligned words
174 or t2, t8, t8
175 bne t4, $wordcmp
176 cmpbge zero, t8, t7 # eos in high bits of s2[1]?
177 cmpeq a3, 0, t8 # eoc in s2[1]?
178 or t7, t8, t7
179 bne t7, $u_final
180
181 /* Unaligned copy main loop. In order to avoid reading too much,
182 the loop is structured to detect zeros in aligned words from s2.
183 This has, unfortunately, effectively pulled half of a loop
184 iteration out into the head and half into the tail, but it does
185 prevent nastiness from accumulating in the very thing we want
186 to run as fast as possible.
187
188 On entry to this basic block:
189 t2 == the unshifted low-bits from the next s2 word.
190 t10 == ofs of last byte in s1 last word.
191 t11 == ofs of last byte in s2 last word. */
192 .align 4
193$u_loop:
194 extql t2, a1, t3 # e0 :
195 ldq_u t2, 16(a1) # .. e1 : load next s2 high bits
196 ldq_u t0, 8(a0) # e0 : load next s1 word
197 addq a1, 8, a1 # .. e1 :
198
199 addq a0, 8, a0 # e0 :
200 subq a3, 1, a3 # .. e1 :
201 extqh t2, a1, t1 # e0 :
202 cmpbge zero, t0, t7 # .. e1 : eos in current s1 word
203
204 or t1, t3, t1 # e0 :
205 beq a2, $eoc # .. e1 : eoc in current s1 word
206 subq a2, 1, a2 # e0 :
207 cmpbge zero, t2, t4 # .. e1 : eos in s2[1]
208
209 xor t0, t1, t3 # e0 : compare the words
210 ev6_unop
211 ev6_unop
212 bne t7, $eos # .. e1 :
213
214 cmpeq a3, 0, t5 # e0 : eoc in s2[1]
215 ev6_unop
216 ev6_unop
217 bne t3, $wordcmp # .. e1 :
218
219 or t4, t5, t4 # e0 : eos or eoc in s2[1].
220 beq t4, $u_loop # .. e1 (zdb)
221
222 /* We've found a zero in the low bits of the last s2 word. Get
223 the next s1 word and align them. */
224 .align 3
225$u_final:
226 ldq_u t0, 8(a0)
227 extql t2, a1, t1
228 cmpbge zero, t1, t7
229 bne a2, $eos
230
231 /* We've hit end of count. Zero everything after the count
232 and compare whats left. */
233 .align 3
234$eoc:
235 mskql t0, t10, t0
236 mskql t1, t10, t1
237 cmpbge zero, t1, t7
238
239 /* We've found a zero somewhere in a word we just read.
240 On entry to this basic block:
241 t0 == s1 word
242 t1 == s2 word
243 t7 == cmpbge mask containing the zero. */
244 .align 3
245$eos:
246 negq t7, t6 # create bytemask of valid data
247 and t6, t7, t8
248 subq t8, 1, t6
249 or t6, t8, t7
250 zapnot t0, t7, t0 # kill the garbage
251 zapnot t1, t7, t1
252 xor t0, t1, v0 # ... and compare
253 beq v0, $done
254
255 /* Here we have two differing co-aligned words in t0 & t1.
256 Bytewise compare them and return (t0 > t1 ? 1 : -1). */
257 .align 3
258$wordcmp:
259 cmpbge t0, t1, t2 # comparison yields bit mask of ge
260 cmpbge t1, t0, t3
261 xor t2, t3, t0 # bits set iff t0/t1 bytes differ
262 negq t0, t1 # clear all but least bit
263 and t0, t1, t0
264 lda v0, -1
265 and t0, t2, t1 # was bit set in t0 > t1?
266 cmovne t1, 1, v0
267$done:
268 ret
269
270 .align 3
271$zerolength:
272 clr v0
273 ret
274
275 END(strncmp)
276libc_hidden_builtin_def (strncmp)
277

source code of glibc/sysdeps/alpha/strncmp.S