1/* Optimized memchr implementation using LoongArch LSX instructions.
2 Copyright (C) 2023-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include <sys/regdef.h>
21#include <sys/asm.h>
22
23#if IS_IN (libc) && !defined __loongarch_soft_float
24
25# define MEMCHR __memchr_lsx
26
27LEAF(MEMCHR, 6)
28 beqz a2, L(ret0)
29 add.d a3, a0, a2
30 andi t0, a0, 0x1f
31 bstrins.d a0, zero, 4, 0
32
33 vld vr0, a0, 0
34 vld vr1, a0, 16
35 li.d t1, -1
36 li.d t2, 32
37
38 vreplgr2vr.b vr2, a1
39 sll.d t3, t1, t0
40 sub.d t2, t2, t0
41 vseq.b vr0, vr0, vr2
42
43 vseq.b vr1, vr1, vr2
44 vmsknz.b vr0, vr0
45 vmsknz.b vr1, vr1
46 vilvl.h vr0, vr1, vr0
47
48
49 movfr2gr.s t0, fa0
50 and t0, t0, t3
51 bgeu t2, a2, L(end)
52 bnez t0, L(found)
53
54 addi.d a4, a3, -1
55 bstrins.d a4, zero, 4, 0
56L(loop):
57 vld vr0, a0, 32
58 vld vr1, a0, 48
59
60 addi.d a0, a0, 32
61 vseq.b vr0, vr0, vr2
62 vseq.b vr1, vr1, vr2
63 beq a0, a4, L(out)
64
65 vmax.bu vr3, vr0, vr1
66 vseteqz.v fcc0, vr3
67 bcnez fcc0, L(loop)
68 vmsknz.b vr0, vr0
69
70
71 vmsknz.b vr1, vr1
72 vilvl.h vr0, vr1, vr0
73 movfr2gr.s t0, fa0
74L(found):
75 ctz.w t0, t0
76
77 add.d a0, a0, t0
78 jr ra
79L(ret0):
80 move a0, zero
81 jr ra
82
83L(out):
84 vmsknz.b vr0, vr0
85 vmsknz.b vr1, vr1
86 vilvl.h vr0, vr1, vr0
87 movfr2gr.s t0, fa0
88
89L(end):
90 sub.d t2, zero, a3
91 srl.w t1, t1, t2
92 and t0, t0, t1
93 ctz.w t1, t0
94
95
96 add.d a0, a0, t1
97 maskeqz a0, a0, t0
98 jr ra
99END(MEMCHR)
100
101libc_hidden_builtin_def (MEMCHR)
102#endif
103

source code of glibc/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S