1 | /* |
2 | * memset - fill memory with a constant |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | */ |
8 | |
9 | /* |
10 | Written by Dave Gilbert <david.gilbert@linaro.org> |
11 | |
12 | This memset routine is optimised on a Cortex-A9 and should work on |
13 | all ARMv7 processors. |
14 | |
15 | */ |
16 | |
17 | .syntax unified |
18 | .arch armv7-a |
19 | |
20 | @ 2011-08-30 david.gilbert@linaro.org |
21 | @ Extracted from local git 2f11b436 |
22 | |
23 | @ this lets us check a flag in a 00/ff byte easily in either endianness |
24 | #ifdef __ARMEB__ |
25 | #define CHARTSTMASK(c) 1<<(31-(c*8)) |
26 | #else |
27 | #define CHARTSTMASK(c) 1<<(c*8) |
28 | #endif |
29 | .text |
30 | .thumb |
31 | |
32 | @ --------------------------------------------------------------------------- |
33 | .thumb_func |
34 | .align 2 |
35 | .p2align 4,,15 |
36 | .global __memset_arm |
37 | .type __memset_arm,%function |
38 | __memset_arm: |
39 | @ r0 = address |
40 | @ r1 = character |
41 | @ r2 = count |
42 | @ returns original address in r0 |
43 | |
44 | mov r3, r0 @ Leave r0 alone |
45 | cbz r2, 10f @ Exit if 0 length |
46 | |
47 | tst r0, #7 |
48 | beq 2f @ Already aligned |
49 | |
50 | @ Ok, so we're misaligned here |
51 | 1: |
52 | strb r1, [r3], #1 |
53 | subs r2,r2,#1 |
54 | tst r3, #7 |
55 | cbz r2, 10f @ Exit if we hit the end |
56 | bne 1b @ go round again if still misaligned |
57 | |
58 | 2: |
59 | @ OK, so we're aligned |
60 | push {r4,r5,r6,r7} |
61 | bics r4, r2, #15 @ if less than 16 bytes then need to finish it off |
62 | beq 5f |
63 | |
64 | 3: |
65 | @ POSIX says that ch is cast to an unsigned char. A uxtb is one |
66 | @ byte and takes two cycles, where an AND is four bytes but one |
67 | @ cycle. |
68 | and r1, #0xFF |
69 | orr r1, r1, r1, lsl#8 @ Same character into all bytes |
70 | orr r1, r1, r1, lsl#16 |
71 | mov r5,r1 |
72 | mov r6,r1 |
73 | mov r7,r1 |
74 | |
75 | 4: |
76 | subs r4,r4,#16 |
77 | stmia r3!,{r1,r5,r6,r7} |
78 | bne 4b |
79 | and r2,r2,#15 |
80 | |
81 | @ At this point we're still aligned and we have upto align-1 bytes left to right |
82 | @ we can avoid some of the byte-at-a time now by testing for some big chunks |
83 | tst r2,#8 |
84 | itt ne |
85 | subne r2,r2,#8 |
86 | stmiane r3!,{r1,r5} |
87 | |
88 | 5: |
89 | pop {r4,r5,r6,r7} |
90 | cbz r2, 10f |
91 | |
92 | @ Got to do any last < alignment bytes |
93 | 6: |
94 | subs r2,r2,#1 |
95 | strb r1,[r3],#1 |
96 | bne 6b |
97 | |
98 | 10: |
99 | bx lr @ goodbye |
100 | .size __memset_arm, . - __memset_arm |
101 | |