1/* Optimized strrchr implementation using LoongArch LSX instructions.
2 Copyright (C) 2023-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include <sys/regdef.h>
21#include <sys/asm.h>
22
23#if IS_IN (libc) && !defined __loongarch_soft_float
24
25#define STRRCHR __strrchr_lsx
26
27LEAF(STRRCHR, 6)
28 move a2, a0
29 bstrins.d a0, zero, 4, 0
30 vld vr0, a0, 0
31 vld vr1, a0, 16
32
33 li.d t2, -1
34 vreplgr2vr.b vr4, a1
35 vmsknz.b vr2, vr0
36 vmsknz.b vr3, vr1
37
38 vilvl.h vr2, vr3, vr2
39 movfr2gr.s t0, fa2
40 sra.w t0, t0, a2
41 beq t0, t2, L(find_tail)
42
43 vseq.b vr2, vr0, vr4
44 vseq.b vr3, vr1, vr4
45 vmsknz.b vr2, vr2
46 vmsknz.b vr3, vr3
47
48
49 vilvl.h vr1, vr3, vr2
50 slli.d t3, t2, 1
51 movfr2gr.s t1, fa1
52 cto.w t0, t0
53
54 srl.w t1, t1, a2
55 sll.d t3, t3, t0
56 addi.d a0, a2, 31
57 andn t1, t1, t3
58
59 clz.w t0, t1
60 sub.d a0, a0, t0
61 maskeqz a0, a0, t1
62 jr ra
63
64 .align 5
65L(find_tail):
66 addi.d a3, a0, 32
67L(loop):
68 vld vr2, a0, 32
69 vld vr3, a0, 48
70 addi.d a0, a0, 32
71
72 vmin.bu vr5, vr2, vr3
73 vsetanyeqz.b fcc0, vr5
74 bceqz fcc0, L(loop)
75 vmsknz.b vr5, vr2
76
77 vmsknz.b vr6, vr3
78 vilvl.h vr5, vr6, vr5
79 vseq.b vr2, vr2, vr4
80 vseq.b vr3, vr3, vr4
81
82 vmsknz.b vr2, vr2
83 vmsknz.b vr3, vr3
84 vilvl.h vr2, vr3, vr2
85 movfr2gr.s t0, fa5
86
87
88 movfr2gr.s t1, fa2
89 slli.d t3, t2, 1
90 cto.w t0, t0
91 sll.d t3, t3, t0
92
93 andn t1, t1, t3
94 beqz t1, L(find_loop)
95 clz.w t0, t1
96 addi.d a0, a0, 31
97
98 sub.d a0, a0, t0
99 jr ra
100L(find_loop):
101 beq a0, a3, L(find_end)
102 vld vr2, a0, -32
103
104 vld vr3, a0, -16
105 addi.d a0, a0, -32
106 vseq.b vr2, vr2, vr4
107 vseq.b vr3, vr3, vr4
108
109
110 vmax.bu vr5, vr2, vr3
111 vseteqz.v fcc0, vr5
112 bcnez fcc0, L(find_loop)
113 vmsknz.b vr0, vr2
114
115 vmsknz.b vr1, vr3
116 vilvl.h vr0, vr1, vr0
117 movfr2gr.s t0, fa0
118 addi.d a0, a0, 31
119
120 clz.w t0, t0
121 sub.d a0, a0, t0
122 jr ra
123 nop
124
125L(find_end):
126 vseq.b vr2, vr0, vr4
127 vseq.b vr3, vr1, vr4
128 vmsknz.b vr2, vr2
129 vmsknz.b vr3, vr3
130
131
132 vilvl.h vr1, vr3, vr2
133 movfr2gr.s t1, fa1
134 addi.d a0, a2, 31
135 srl.w t1, t1, a2
136
137 clz.w t0, t1
138 sub.d a0, a0, t0
139 maskeqz a0, a0, t1
140 jr ra
141END(STRRCHR)
142
143libc_hidden_builtin_def(STRRCHR)
144#endif
145

source code of glibc/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S