strnlen.S source code [linux/arch/arm64/lib/strnlen.S]

1	/ SPDX-License-Identifier: GPL-2.0-only /
2	/*
3	* Copyright (C) 2013 ARM Ltd.
4	* Copyright (C) 2013 Linaro.
5	*
6	* This code is based on glibc cortex strings work originally authored by Linaro
7	* be found @
8	*
9	* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
10	* files/head:/src/aarch64/
11	*/
12
13	#include <linux/linkage.h>
14	#include <asm/assembler.h>
15
16	/*
17	* determine the length of a fixed-size string
18	*
19	* Parameters:
20	* x0 - const string pointer
21	* x1 - maximal string length
22	* Returns:
23	* x0 - the return length of specific string
24	*/
25
26	/ Arguments and results. /
27	srcin .req x0
28	len .req x0
29	limit .req x1
30
31	/ Locals and temporaries. /
32	src .req x2
33	data1 .req x3
34	data2 .req x4
35	data2a .req x5
36	has_nul1 .req x6
37	has_nul2 .req x7
38	tmp1 .req x8
39	tmp2 .req x9
40	tmp3 .req x10
41	tmp4 .req x11
42	zeroones .req x12
43	pos .req x13
44	limit_wd .req x14
45
46	#define REP8_01 0x0101010101010101
47	#define REP8_7f 0x7f7f7f7f7f7f7f7f
48	#define REP8_80 0x8080808080808080
49
50	SYM_FUNC_START(__pi_strnlen)
51	cbz limit, .Lhit_limit
52	mov zeroones, #REP8_01
53	bic src, srcin, #`15`
54	ands tmp1, srcin, #`15`
55	b.ne .Lmisaligned
56	/ Calculate the number of full and partial words -1. /
57	sub limit_wd, limit, #`1` / Limit != 0, so no underflow. /
58	lsr limit_wd, limit_wd, #`4` / Convert to Qwords. /
59
60	/*
61	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
62	* (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
63	* can be done in parallel across the entire word.
64	*/
65	/*
66	* The inner loop deals with two Dwords at a time. This has a
67	* slightly higher start-up cost, but we should win quite quickly,
68	* especially on cores with a high number of issue slots per
69	* cycle, as we get much better parallelism out of the operations.
70	*/
71	.Lloop:
72	ldp data1, data2, [src], #`16`
73	.Lrealigned:
74	sub tmp1, data1, zeroones
75	orr tmp2, data1, #REP8_7f
76	sub tmp3, data2, zeroones
77	orr tmp4, data2, #REP8_7f
78	bic has_nul1, tmp1, tmp2
79	bic has_nul2, tmp3, tmp4
80	subs limit_wd, limit_wd, #`1`
81	orr tmp1, has_nul1, has_nul2
82	ccmp tmp1, #`0`, #`0`, pl / NZCV = 0000 /
83	b.eq .Lloop
84
85	cbz tmp1, .Lhit_limit / No null in final Qword. /
86
87	/*
88	* We know there's a null in the final Qword. The easiest thing
89	* to do now is work out the length of the string and return
90	* MIN (len, limit).
91	*/
92	sub len, src, srcin
93	cbz has_nul1, .Lnul_in_data2
94	CPU_BE( mov data2, data1 ) /perpare data to re-calculate the syndrome/
95
96	sub len, len, #`8`
97	mov has_nul2, has_nul1
98	.Lnul_in_data2:
99	/*
100	* For big-endian, carry propagation (if the final byte in the
101	* string is 0x01) means we cannot use has_nul directly. The
102	* easiest way to get the correct byte is to byte-swap the data
103	* and calculate the syndrome a second time.
104	*/
105	CPU_BE( rev data2, data2 )
106	CPU_BE( sub tmp1, data2, zeroones )
107	CPU_BE( orr tmp2, data2, #REP8_7f )
108	CPU_BE( bic has_nul2, tmp1, tmp2 )
109
110	sub len, len, #`8`
111	rev has_nul2, has_nul2
112	clz pos, has_nul2
113	add len, len, pos, lsr #`3` / Bits to bytes. /
114	cmp len, limit
115	csel len, len, limit, ls / Return the lower value. /
116	ret
117
118	.Lmisaligned:
119	/*
120	* Deal with a partial first word.
121	* We're doing two things in parallel here;
122	* 1) Calculate the number of words (but avoiding overflow if
123	* limit is near ULONG_MAX) - to do this we need to work out
124	* limit + tmp1 - 1 as a 65-bit value before shifting it;
125	* 2) Load and mask the initial data words - we force the bytes
126	* before the ones we are interested in to 0xff - this ensures
127	* early bytes will not hit any zero detection.
128	*/
129	ldp data1, data2, [src], #`16`
130
131	sub limit_wd, limit, #`1`
132	and tmp3, limit_wd, #`15`
133	lsr limit_wd, limit_wd, #`4`
134
135	add tmp3, tmp3, tmp1
136	add limit_wd, limit_wd, tmp3, lsr #`4`
137
138	neg tmp4, tmp1
139	lsl tmp4, tmp4, #`3` / Bytes beyond alignment -> bits. /
140
141	mov tmp2, #~`0`
142	/ Big-endian. Early bytes are at MSB. /
143	CPU_BE( lsl tmp2, tmp2, tmp4 ) / Shift (tmp1 & 63). /
144	/ Little-endian. Early bytes are at LSB. /
145	CPU_LE( lsr tmp2, tmp2, tmp4 ) / Shift (tmp1 & 63). /
146
147	cmp tmp1, #`8`
148
149	orr data1, data1, tmp2
150	orr data2a, data2, tmp2
151
152	csinv data1, data1, xzr, le
153	csel data2, data2, data2a, le
154	b .Lrealigned
155
156	.Lhit_limit:
157	mov len, limit
158	ret
159	SYM_FUNC_END(__pi_strnlen)
160
161	SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)
162	EXPORT_SYMBOL_NOKASAN(strnlen)
163

source code of linux/arch/arm64/lib/strnlen.S