1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) |
4 | * |
5 | * Finds length of a 0-terminated string. Optimized for the |
6 | * Alpha architecture: |
7 | * |
8 | * - memory accessed as aligned quadwords only |
9 | * - uses bcmpge to compare 8 bytes in parallel |
10 | * - does binary search to find 0 byte in last |
11 | * quadword (HAKMEM needed 12 instructions to |
12 | * do this instead of the 9 instructions that |
13 | * binary search needs). |
14 | */ |
15 | #include <linux/export.h> |
16 | .set noreorder |
17 | .set noat |
18 | |
19 | .align 3 |
20 | |
21 | .globl strlen |
22 | .ent strlen |
23 | |
24 | strlen: |
25 | ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) |
26 | lda $2, -1($31) |
27 | insqh $2, $16, $2 |
28 | andnot $16, 7, $0 |
29 | or $2, $1, $1 |
30 | cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 |
31 | bne $2, found |
32 | |
33 | loop: ldq $1, 8($0) |
34 | addq $0, 8, $0 # addr += 8 |
35 | nop # helps dual issue last two insns |
36 | cmpbge $31, $1, $2 |
37 | beq $2, loop |
38 | |
39 | found: blbs $2, done # make aligned case fast |
40 | negq $2, $3 |
41 | and $2, $3, $2 |
42 | |
43 | and $2, 0x0f, $1 |
44 | addq $0, 4, $3 |
45 | cmoveq $1, $3, $0 |
46 | |
47 | and $2, 0x33, $1 |
48 | addq $0, 2, $3 |
49 | cmoveq $1, $3, $0 |
50 | |
51 | and $2, 0x55, $1 |
52 | addq $0, 1, $3 |
53 | cmoveq $1, $3, $0 |
54 | |
55 | done: subq $0, $16, $0 |
56 | ret $31, ($26) |
57 | |
58 | .end strlen |
59 | EXPORT_SYMBOL(strlen) |
60 | |