1 | /* |
2 | * memchr - scan memory for a character |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | */ |
8 | |
9 | /* |
10 | Written by Dave Gilbert <david.gilbert@linaro.org> |
11 | |
12 | This __memchr_arm routine is optimised on a Cortex-A9 and should work on |
13 | all ARMv7 processors. It has a fast past for short sizes, and has |
14 | an optimised path for large data sets; the worst case is finding the |
15 | match early in a large data set. |
16 | |
17 | */ |
18 | |
19 | @ 2011-02-07 david.gilbert@linaro.org |
20 | @ Extracted from local git a5b438d861 |
21 | @ 2011-07-14 david.gilbert@linaro.org |
22 | @ Import endianness fix from local git ea786f1b |
23 | @ 2011-12-07 david.gilbert@linaro.org |
24 | @ Removed unneeded cbz from align loop |
25 | |
26 | .syntax unified |
27 | .arch armv7-a |
28 | |
29 | @ this lets us check a flag in a 00/ff byte easily in either endianness |
30 | #ifdef __ARMEB__ |
31 | #define CHARTSTMASK(c) 1<<(31-(c*8)) |
32 | #else |
33 | #define CHARTSTMASK(c) 1<<(c*8) |
34 | #endif |
35 | .text |
36 | .thumb |
37 | |
38 | @ --------------------------------------------------------------------------- |
39 | .thumb_func |
40 | .align 2 |
41 | .p2align 4,,15 |
42 | .global __memchr_arm |
43 | .type __memchr_arm,%function |
44 | __memchr_arm: |
45 | @ r0 = start of memory to scan |
46 | @ r1 = character to look for |
47 | @ r2 = length |
48 | @ returns r0 = pointer to character or NULL if not found |
49 | and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char |
50 | |
51 | cmp r2,#16 @ If it's short don't bother with anything clever |
52 | blt 20f |
53 | |
54 | tst r0, #7 @ If it's already aligned skip the next bit |
55 | beq 10f |
56 | |
57 | @ Work up to an aligned point |
58 | 5: |
59 | ldrb r3, [r0],#1 |
60 | subs r2, r2, #1 |
61 | cmp r3, r1 |
62 | beq 50f @ If it matches exit found |
63 | tst r0, #7 |
64 | bne 5b @ If not aligned yet then do next byte |
65 | |
66 | 10: |
67 | @ At this point, we are aligned, we know we have at least 8 bytes to work with |
68 | push {r4,r5,r6,r7} |
69 | orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes |
70 | orr r1, r1, r1, lsl #16 |
71 | bic r4, r2, #7 @ Number of double words to work with |
72 | mvns r7, #0 @ all F's |
73 | movs r3, #0 |
74 | |
75 | 15: |
76 | ldmia r0!,{r5,r6} |
77 | subs r4, r4, #8 |
78 | eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target |
79 | eor r6,r6, r1 |
80 | uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
81 | sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
82 | uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
83 | sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
84 | cbnz r6, 60f |
85 | bne 15b @ (Flags from the subs above) If not run out of bytes then go around again |
86 | |
87 | pop {r4,r5,r6,r7} |
88 | and r1,r1,#0xff @ Get r1 back to a single character from the expansion above |
89 | and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done |
90 | |
91 | 20: |
92 | cbz r2, 40f @ 0 length or hit the end already then not found |
93 | |
94 | 21: @ Post aligned section, or just a short call |
95 | ldrb r3,[r0],#1 |
96 | subs r2,r2,#1 |
97 | eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub |
98 | cbz r3, 50f |
99 | bne 21b @ on r2 flags |
100 | |
101 | 40: |
102 | movs r0,#0 @ not found |
103 | bx lr |
104 | |
105 | 50: |
106 | subs r0,r0,#1 @ found |
107 | bx lr |
108 | |
109 | 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was |
110 | @ r0 points to the start of the double word after the one that was tested |
111 | @ r5 has the 00/ff pattern for the first word, r6 has the chained value |
112 | cmp r5, #0 |
113 | itte eq |
114 | moveq r5, r6 @ the end is in the 2nd word |
115 | subeq r0,r0,#3 @ Points to 2nd byte of 2nd word |
116 | subne r0,r0,#7 @ or 2nd byte of 1st word |
117 | |
118 | @ r0 currently points to the 3rd byte of the word containing the hit |
119 | tst r5, # CHARTSTMASK(0) @ 1st character |
120 | bne 61f |
121 | adds r0,r0,#1 |
122 | tst r5, # CHARTSTMASK(1) @ 2nd character |
123 | ittt eq |
124 | addeq r0,r0,#1 |
125 | tsteq r5, # (3<<15) @ 2nd & 3rd character |
126 | @ If not the 3rd must be the last one |
127 | addeq r0,r0,#1 |
128 | |
129 | 61: |
130 | pop {r4,r5,r6,r7} |
131 | subs r0,r0,#1 |
132 | bx lr |
133 | |
134 | .size __memchr_arm, . - __memchr_arm |
135 | |