1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * arch/alpha/lib/ev67-strcat.S |
4 | * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> |
5 | * |
6 | * Append a null-terminated string from SRC to DST. |
7 | * |
8 | * Much of the information about 21264 scheduling/coding comes from: |
9 | * Compiler Writer's Guide for the Alpha 21264 |
10 | * abbreviated as 'CWG' in other comments here |
11 | * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html |
12 | * Scheduling notation: |
13 | * E - either cluster |
14 | * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 |
15 | * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 |
16 | * Try not to change the actual algorithm if possible for consistency. |
17 | * Commentary: It seems bogus to walk the input string twice - once |
18 | * to determine the length, and then again while doing the copy. |
19 | * A significant (future) enhancement would be to only read the input |
20 | * string once. |
21 | */ |
22 | |
23 | #include <linux/export.h> |
24 | .text |
25 | |
26 | .align 4 |
27 | .globl strcat |
28 | .ent strcat |
29 | strcat: |
30 | .frame $30, 0, $26 |
31 | .prologue 0 |
32 | |
33 | mov $16, $0 # E : set up return value |
34 | /* Find the end of the string. */ |
35 | ldq_u $1, 0($16) # L : load first quadword (a0 may be misaligned) |
36 | lda $2, -1 # E : |
37 | insqh $2, $16, $2 # U : |
38 | |
39 | andnot $16, 7, $16 # E : |
40 | or $2, $1, $1 # E : |
41 | cmpbge $31, $1, $2 # E : bits set iff byte == 0 |
42 | bne $2, $found # U : |
43 | |
44 | $loop: ldq $1, 8($16) # L : |
45 | addq $16, 8, $16 # E : |
46 | cmpbge $31, $1, $2 # E : |
47 | beq $2, $loop # U : |
48 | |
49 | $found: cttz $2, $3 # U0 : |
50 | addq $16, $3, $16 # E : |
51 | /* Now do the append. */ |
52 | mov $26, $23 # E : |
53 | br __stxcpy # L0 : |
54 | |
55 | .end strcat |
56 | EXPORT_SYMBOL(strcat) |
57 | |