strncmp.S source code [glibc/sysdeps/alpha/strncmp.S]

1	/ Copyright (C) 1996-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library. If not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	/ Bytewise compare two null-terminated strings of length no longer than N. /
19
20	#include <sysdep.h>
21
22	.set noat
23	.set noreorder
24
25	/ EV6 only predicts one branch per octaword. We'll use these to push*
26	subsequent branches back to the next bundle. This will generally add
27	a fetch+decode cycle to older machines, so skip in that case. /*
28	#ifdef __alpha_fix__
29	# define ev6_unop unop
30	#else
31	# define ev6_unop
32	#endif
33
34	.text
35
36	ENTRY(strncmp)
37	#ifdef PROF
38	ldgp gp, `0`(pv)
39	lda AT, _mcount
40	jsr AT, (AT), _mcount
41	.prologue `1`
42	#else
43	.prologue `0`
44	#endif
45
46	xor a0, a1, t2 # are s1 and s2 co-aligned?
47	beq a2, $zerolength
48	ldq_u t0, `0`(a0) # load asap to give cache time to catch up
49	ldq_u t1, `0`(a1)
50	lda t3, -`1`
51	and t2, `7`, t2
52	srl t3, `1`, t6
53	and a0, `7`, t4 # find s1 misalignment
54	and a1, `7`, t5 # find s2 misalignment
55	cmovlt a2, t6, a2 # bound neg count to LONG_MAX
56	addq a1, a2, a3 # s2+count
57	addq a2, t4, a2 # bias count by s1 misalignment
58	and a2, `7`, t10 # ofs of last byte in s1 last word
59	srl a2, `3`, a2 # remaining full words in s1 count
60	bne t2, $unaligned
61
62	/ On entry to this basic block:*
63	t0 == the first word of s1.
64	t1 == the first word of s2.
65	t3 == -1. /*
66	$aligned:
67	mskqh t3, a1, t8 # mask off leading garbage
68	ornot t1, t8, t1
69	ornot t0, t8, t0
70	cmpbge zero, t1, t7 # bits set iff null found
71	beq a2, $eoc # check end of count
72	bne t7, $eos
73	beq t10, $ant_loop
74
75	/ Aligned compare main loop.*
76	On entry to this basic block:
77	t0 == an s1 word.
78	t1 == an s2 word not containing a null. /*
79
80	.align `4`
81	$a_loop:
82	xor t0, t1, t2 # e0 :
83	bne t2, $wordcmp # .. e1 (zdb)
84	ldq_u t1, `8`(a1) # e0 :
85	ldq_u t0, `8`(a0) # .. e1 :
86
87	subq a2, `1`, a2 # e0 :
88	addq a1, `8`, a1 # .. e1 :
89	addq a0, `8`, a0 # e0 :
90	beq a2, $eoc # .. e1 :
91
92	cmpbge zero, t1, t7 # e0 :
93	beq t7, $a_loop # .. e1 :
94
95	br $eos
96
97	/ Alternate aligned compare loop, for when there's no trailing*
98	bytes on the count. We have to avoid reading too much data. /*
99	.align `4`
100	$ant_loop:
101	xor t0, t1, t2 # e0 :
102	ev6_unop
103	ev6_unop
104	bne t2, $wordcmp # .. e1 (zdb)
105
106	subq a2, `1`, a2 # e0 :
107	beq a2, $zerolength # .. e1 :
108	ldq_u t1, `8`(a1) # e0 :
109	ldq_u t0, `8`(a0) # .. e1 :
110
111	addq a1, `8`, a1 # e0 :
112	addq a0, `8`, a0 # .. e1 :
113	cmpbge zero, t1, t7 # e0 :
114	beq t7, $ant_loop # .. e1 :
115
116	br $eos
117
118	/ The two strings are not co-aligned. Align s1 and cope. /
119	/ On entry to this basic block:*
120	t0 == the first word of s1.
121	t1 == the first word of s2.
122	t3 == -1.
123	t4 == misalignment of s1.
124	t5 == misalignment of s2.
125	t10 == misalignment of s1 end. /*
126	.align `4`
127	$unaligned:
128	/ If s1 misalignment is larger than s2 misalignment, we need*
129	extra startup checks to avoid SEGV. /*
130	subq a1, t4, a1 # adjust s2 for s1 misalignment
131	cmpult t4, t5, t9
132	subq a3, `1`, a3 # last byte of s2
133	bic a1, `7`, t8
134	mskqh t3, t5, t7 # mask garbage in s2
135	subq a3, t8, a3
136	ornot t1, t7, t7
137	srl a3, `3`, a3 # remaining full words in s2 count
138	beq t9, $u_head
139
140	/ Failing that, we need to look for both eos and eoc within the*
141	first word of s2. If we find either, we can continue by
142	pretending that the next word of s2 is all zeros. /*
143	lda t2, `0` # next = zero
144	cmpeq a3, `0`, t8 # eoc in the first word of s2?
145	cmpbge zero, t7, t7 # eos in the first word of s2?
146	or t7, t8, t8
147	bne t8, $u_head_nl
148
149	/ We know just enough now to be able to assemble the first*
150	full word of s2. We can still find a zero at the end of it.
151
152	On entry to this basic block:
153	t0 == first word of s1
154	t1 == first partial word of s2.
155	t3 == -1.
156	t10 == ofs of last byte in s1 last word.
157	t11 == ofs of last byte in s2 last word. /*
158	$u_head:
159	ldq_u t2, `8`(a1) # load second partial s2 word
160	subq a3, `1`, a3
161	$u_head_nl:
162	extql t1, a1, t1 # create first s2 word
163	mskqh t3, a0, t8
164	extqh t2, a1, t4
165	ornot t0, t8, t0 # kill s1 garbage
166	or t1, t4, t1 # s2 word now complete
167	cmpbge zero, t0, t7 # find eos in first s1 word
168	ornot t1, t8, t1 # kill s2 garbage
169	beq a2, $eoc
170	subq a2, `1`, a2
171	bne t7, $eos
172	mskql t3, a1, t8 # mask out s2[`1`] bits we have seen
173	xor t0, t1, t4 # compare aligned words
174	or t2, t8, t8
175	bne t4, $wordcmp
176	cmpbge zero, t8, t7 # eos in high bits of s2[`1`]?
177	cmpeq a3, `0`, t8 # eoc in s2[`1`]?
178	or t7, t8, t7
179	bne t7, $u_final
180
181	/ Unaligned copy main loop. In order to avoid reading too much,*
182	the loop is structured to detect zeros in aligned words from s2.
183	This has, unfortunately, effectively pulled half of a loop
184	iteration out into the head and half into the tail, but it does
185	prevent nastiness from accumulating in the very thing we want
186	to run as fast as possible.
187
188	On entry to this basic block:
189	t2 == the unshifted low-bits from the next s2 word.
190	t10 == ofs of last byte in s1 last word.
191	t11 == ofs of last byte in s2 last word. /*
192	.align `4`
193	$u_loop:
194	extql t2, a1, t3 # e0 :
195	ldq_u t2, `16`(a1) # .. e1 : load next s2 high bits
196	ldq_u t0, `8`(a0) # e0 : load next s1 word
197	addq a1, `8`, a1 # .. e1 :
198
199	addq a0, `8`, a0 # e0 :
200	subq a3, `1`, a3 # .. e1 :
201	extqh t2, a1, t1 # e0 :
202	cmpbge zero, t0, t7 # .. e1 : eos in current s1 word
203
204	or t1, t3, t1 # e0 :
205	beq a2, $eoc # .. e1 : eoc in current s1 word
206	subq a2, `1`, a2 # e0 :
207	cmpbge zero, t2, t4 # .. e1 : eos in s2[`1`]
208
209	xor t0, t1, t3 # e0 : compare the words
210	ev6_unop
211	ev6_unop
212	bne t7, $eos # .. e1 :
213
214	cmpeq a3, `0`, t5 # e0 : eoc in s2[`1`]
215	ev6_unop
216	ev6_unop
217	bne t3, $wordcmp # .. e1 :
218
219	or t4, t5, t4 # e0 : eos or eoc in s2[`1`].
220	beq t4, $u_loop # .. e1 (zdb)
221
222	/ We've found a zero in the low bits of the last s2 word. Get*
223	the next s1 word and align them. /*
224	.align `3`
225	$u_final:
226	ldq_u t0, `8`(a0)
227	extql t2, a1, t1
228	cmpbge zero, t1, t7
229	bne a2, $eos
230
231	/ We've hit end of count. Zero everything after the count*
232	and compare whats left. /*
233	.align `3`
234	$eoc:
235	mskql t0, t10, t0
236	mskql t1, t10, t1
237	cmpbge zero, t1, t7
238
239	/ We've found a zero somewhere in a word we just read.*
240	On entry to this basic block:
241	t0 == s1 word
242	t1 == s2 word
243	t7 == cmpbge mask containing the zero. /*
244	.align `3`
245	$eos:
246	negq t7, t6 # create bytemask of valid data
247	and t6, t7, t8
248	subq t8, `1`, t6
249	or t6, t8, t7
250	zapnot t0, t7, t0 # kill the garbage
251	zapnot t1, t7, t1
252	xor t0, t1, v0 # ... and compare
253	beq v0, $done
254
255	/ Here we have two differing co-aligned words in t0 & t1.*
256	Bytewise compare them and return (t0 > t1 ? 1 : -1). /*
257	.align `3`
258	$wordcmp:
259	cmpbge t0, t1, t2 # comparison yields bit mask of ge
260	cmpbge t1, t0, t3
261	xor t2, t3, t0 # bits set iff t0/t1 bytes differ
262	negq t0, t1 # clear all but least bit
263	and t0, t1, t0
264	lda v0, -`1`
265	and t0, t2, t1 # was bit set in t0 > t1?
266	cmovne t1, `1`, v0
267	$done:
268	ret
269
270	.align `3`
271	$zerolength:
272	clr v0
273	ret
274
275	END(strncmp)
276	libc_hidden_builtin_def (strncmp)
277

source code of glibc/sysdeps/alpha/strncmp.S