1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (c) 2013-2021, Arm Limited.
4 *
5 * Adapted from the original at:
6 * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12/* Assumptions:
13 *
14 * ARMv8-a, AArch64, unaligned accesses.
15 */
16
17#define L(label) .L ## label
18
19/* Parameters and result. */
20#define src1 x0
21#define src2 x1
22#define limit x2
23#define result w0
24
25/* Internal variables. */
26#define data1 x3
27#define data1w w3
28#define data1h x4
29#define data2 x5
30#define data2w w5
31#define data2h x6
32#define tmp1 x7
33#define tmp2 x8
34
35SYM_FUNC_START(__pi_memcmp)
36 subs limit, limit, 8
37 b.lo L(less8)
38
39 ldr data1, [src1], 8
40 ldr data2, [src2], 8
41 cmp data1, data2
42 b.ne L(return)
43
44 subs limit, limit, 8
45 b.gt L(more16)
46
47 ldr data1, [src1, limit]
48 ldr data2, [src2, limit]
49 b L(return)
50
51L(more16):
52 ldr data1, [src1], 8
53 ldr data2, [src2], 8
54 cmp data1, data2
55 bne L(return)
56
57 /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
58 strings. */
59 subs limit, limit, 16
60 b.ls L(last_bytes)
61
62 /* We overlap loads between 0-32 bytes at either side of SRC1 when we
63 try to align, so limit it only to strings larger than 128 bytes. */
64 cmp limit, 96
65 b.ls L(loop16)
66
67 /* Align src1 and adjust src2 with bytes not yet done. */
68 and tmp1, src1, 15
69 add limit, limit, tmp1
70 sub src1, src1, tmp1
71 sub src2, src2, tmp1
72
73 /* Loop performing 16 bytes per iteration using aligned src1.
74 Limit is pre-decremented by 16 and must be larger than zero.
75 Exit if <= 16 bytes left to do or if the data is not equal. */
76 .p2align 4
77L(loop16):
78 ldp data1, data1h, [src1], 16
79 ldp data2, data2h, [src2], 16
80 subs limit, limit, 16
81 ccmp data1, data2, 0, hi
82 ccmp data1h, data2h, 0, eq
83 b.eq L(loop16)
84
85 cmp data1, data2
86 bne L(return)
87 mov data1, data1h
88 mov data2, data2h
89 cmp data1, data2
90 bne L(return)
91
92 /* Compare last 1-16 bytes using unaligned access. */
93L(last_bytes):
94 add src1, src1, limit
95 add src2, src2, limit
96 ldp data1, data1h, [src1]
97 ldp data2, data2h, [src2]
98 cmp data1, data2
99 bne L(return)
100 mov data1, data1h
101 mov data2, data2h
102 cmp data1, data2
103
104 /* Compare data bytes and set return value to 0, -1 or 1. */
105L(return):
106#ifndef __AARCH64EB__
107 rev data1, data1
108 rev data2, data2
109#endif
110 cmp data1, data2
111L(ret_eq):
112 cset result, ne
113 cneg result, result, lo
114 ret
115
116 .p2align 4
117 /* Compare up to 8 bytes. Limit is [-8..-1]. */
118L(less8):
119 adds limit, limit, 4
120 b.lo L(less4)
121 ldr data1w, [src1], 4
122 ldr data2w, [src2], 4
123 cmp data1w, data2w
124 b.ne L(return)
125 sub limit, limit, 4
126L(less4):
127 adds limit, limit, 4
128 beq L(ret_eq)
129L(byte_loop):
130 ldrb data1w, [src1], 1
131 ldrb data2w, [src2], 1
132 subs limit, limit, 1
133 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
134 b.eq L(byte_loop)
135 sub result, data1w, data2w
136 ret
137SYM_FUNC_END(__pi_memcmp)
138SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
139EXPORT_SYMBOL_NOKASAN(memcmp)
140

source code of linux/arch/arm64/lib/memcmp.S