1 | /* |
2 | * strcpy |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | */ |
8 | |
9 | #if defined (__thumb2__) && !defined (__thumb__) |
10 | |
11 | /* For GLIBC: |
12 | #include <string.h> |
13 | #include <memcopy.h> |
14 | |
15 | #undef strcmp |
16 | */ |
17 | |
18 | #ifdef __thumb2__ |
19 | #define magic1(REG) "#0x01010101" |
20 | #define magic2(REG) "#0x80808080" |
21 | #else |
22 | #define magic1(REG) #REG |
23 | #define magic2(REG) #REG ", lsl #7" |
24 | #endif |
25 | |
26 | char* __attribute__((naked)) |
27 | __strcpy_arm (char* dst, const char* src) |
28 | { |
29 | __asm__ ( |
30 | "pld [r1, #0]\n\t" |
31 | "eor r2, r0, r1\n\t" |
32 | "mov ip, r0\n\t" |
33 | "tst r2, #3\n\t" |
34 | "bne 4f\n\t" |
35 | "tst r1, #3\n\t" |
36 | "bne 3f\n" |
37 | "5:\n\t" |
38 | # ifndef __thumb2__ |
39 | "str r5, [sp, #-4]!\n\t" |
40 | "mov r5, #0x01\n\t" |
41 | "orr r5, r5, r5, lsl #8\n\t" |
42 | "orr r5, r5, r5, lsl #16\n\t" |
43 | # endif |
44 | |
45 | "str r4, [sp, #-4]!\n\t" |
46 | "tst r1, #4\n\t" |
47 | "ldr r3, [r1], #4\n\t" |
48 | "beq 2f\n\t" |
49 | "sub r2, r3, " magic1(r5)"\n\t" |
50 | "bics r2, r2, r3\n\t" |
51 | "tst r2, " magic2(r5)"\n\t" |
52 | "itt eq\n\t" |
53 | "streq r3, [ip], #4\n\t" |
54 | "ldreq r3, [r1], #4\n" |
55 | "bne 1f\n\t" |
56 | /* Inner loop. We now know that r1 is 64-bit aligned, so we |
57 | can safely fetch up to two words. This allows us to avoid |
58 | load stalls. */ |
59 | ".p2align 2\n" |
60 | "2:\n\t" |
61 | "pld [r1, #8]\n\t" |
62 | "ldr r4, [r1], #4\n\t" |
63 | "sub r2, r3, " magic1(r5)"\n\t" |
64 | "bics r2, r2, r3\n\t" |
65 | "tst r2, " magic2(r5)"\n\t" |
66 | "sub r2, r4, " magic1(r5)"\n\t" |
67 | "bne 1f\n\t" |
68 | "str r3, [ip], #4\n\t" |
69 | "bics r2, r2, r4\n\t" |
70 | "tst r2, " magic2(r5)"\n\t" |
71 | "itt eq\n\t" |
72 | "ldreq r3, [r1], #4\n\t" |
73 | "streq r4, [ip], #4\n\t" |
74 | "beq 2b\n\t" |
75 | "mov r3, r4\n" |
76 | "1:\n\t" |
77 | # ifdef __ARMEB__ |
78 | "rors r3, r3, #24\n\t" |
79 | # endif |
80 | "strb r3, [ip], #1\n\t" |
81 | "tst r3, #0xff\n\t" |
82 | # ifdef __ARMEL__ |
83 | "ror r3, r3, #8\n\t" |
84 | # endif |
85 | "bne 1b\n\t" |
86 | "ldr r4, [sp], #4\n\t" |
87 | # ifndef __thumb2__ |
88 | "ldr r5, [sp], #4\n\t" |
89 | # endif |
90 | "BX LR\n" |
91 | |
92 | /* Strings have the same offset from word alignment, but it's |
93 | not zero. */ |
94 | "3:\n\t" |
95 | "tst r1, #1\n\t" |
96 | "beq 1f\n\t" |
97 | "ldrb r2, [r1], #1\n\t" |
98 | "strb r2, [ip], #1\n\t" |
99 | "cmp r2, #0\n\t" |
100 | "it eq\n" |
101 | "BXEQ LR\n" |
102 | "1:\n\t" |
103 | "tst r1, #2\n\t" |
104 | "beq 5b\n\t" |
105 | "ldrh r2, [r1], #2\n\t" |
106 | # ifdef __ARMEB__ |
107 | "tst r2, #0xff00\n\t" |
108 | "iteet ne\n\t" |
109 | "strneh r2, [ip], #2\n\t" |
110 | "lsreq r2, r2, #8\n\t" |
111 | "streqb r2, [ip]\n\t" |
112 | "tstne r2, #0xff\n\t" |
113 | # else |
114 | "tst r2, #0xff\n\t" |
115 | "itet ne\n\t" |
116 | "strneh r2, [ip], #2\n\t" |
117 | "streqb r2, [ip]\n\t" |
118 | "tstne r2, #0xff00\n\t" |
119 | # endif |
120 | "bne 5b\n\t" |
121 | "BX LR\n" |
122 | |
123 | /* src and dst do not have a common word-alignment. Fall back to |
124 | byte copying. */ |
125 | "4:\n\t" |
126 | "ldrb r2, [r1], #1\n\t" |
127 | "strb r2, [ip], #1\n\t" |
128 | "cmp r2, #0\n\t" |
129 | "bne 4b\n\t" |
130 | "BX LR" ); |
131 | } |
132 | /* For GLIBC: libc_hidden_builtin_def (strcpy) */ |
133 | |
134 | #endif /* defined (__thumb2__) && !defined (__thumb__) */ |
135 | |