1 | #include <stddef.h> |
2 | |
3 | // WARNING: When building the scalar versions of these functions you need to |
4 | // use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang |
5 | // from recognising a loop idiom and planting calls to memcpy! |
6 | |
7 | static void *__arm_sc_memcpy_fwd(void *dest, const void *src, |
8 | size_t n) __arm_streaming_compatible { |
9 | unsigned char *destp = (unsigned char *)dest; |
10 | const unsigned char *srcp = (const unsigned char *)src; |
11 | for (size_t i = 0; i < n; ++i) |
12 | destp[i] = srcp[i]; |
13 | |
14 | return dest; |
15 | } |
16 | |
17 | // If dest and src overlap then behaviour is undefined, hence we can add the |
18 | // restrict keywords here. This also matches the definition of the libc memcpy |
19 | // according to the man page. |
20 | void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src, |
21 | size_t n) __arm_streaming_compatible { |
22 | return __arm_sc_memcpy_fwd(dest, src, n); |
23 | } |
24 | |
25 | void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { |
26 | unsigned char *destp = (unsigned char *)dest; |
27 | unsigned char c8 = (unsigned char)c; |
28 | for (size_t i = 0; i < n; ++i) |
29 | destp[i] = c8; |
30 | |
31 | return dest; |
32 | } |
33 | |
34 | static void *__arm_sc_memcpy_rev(void *dest, const void *src, |
35 | size_t n) __arm_streaming_compatible { |
36 | unsigned char *destp = (unsigned char *)dest; |
37 | const unsigned char *srcp = (const unsigned char *)src; |
38 | // TODO: Improve performance by copying larger chunks in reverse, or by |
39 | // using SVE. |
40 | while (n > 0) { |
41 | --n; |
42 | destp[n] = srcp[n]; |
43 | } |
44 | return dest; |
45 | } |
46 | |
47 | // Semantically a memmove is equivalent to the following: |
48 | // 1. Copy the entire contents of src to a temporary array that does not |
49 | // overlap with src or dest. |
50 | // 2. Copy the contents of the temporary array into dest. |
51 | void *__arm_sc_memmove(void *dest, const void *src, |
52 | size_t n) __arm_streaming_compatible { |
53 | unsigned char *destp = (unsigned char *)dest; |
54 | const unsigned char *srcp = (const unsigned char *)src; |
55 | |
56 | // If src and dest don't overlap then just invoke memcpy |
57 | if ((srcp > (destp + n)) || (destp > (srcp + n))) |
58 | return __arm_sc_memcpy_fwd(dest, src, n); |
59 | |
60 | // Overlap case 1: |
61 | // src: Low | -> | High |
62 | // dest: Low | -> | High |
63 | // Here src is always ahead of dest at a higher addres. If we first read a |
64 | // chunk of data from src we can safely write the same chunk to dest without |
65 | // corrupting future reads of src. |
66 | if (srcp > destp) |
67 | return __arm_sc_memcpy_fwd(dest, src, n); |
68 | |
69 | // Overlap case 2: |
70 | // src: Low | -> | High |
71 | // dest: Low | -> | High |
72 | // While we're in the overlap region we're always corrupting future reads of |
73 | // src when writing to dest. An efficient way to do this is to copy the data |
74 | // in reverse by starting at the highest address. |
75 | return __arm_sc_memcpy_rev(dest, src, n); |
76 | } |
77 | |
78 | const void *__arm_sc_memchr(const void *src, int c, |
79 | size_t n) __arm_streaming_compatible { |
80 | const unsigned char *srcp = (const unsigned char *)src; |
81 | unsigned char c8 = (unsigned char)c; |
82 | for (size_t i = 0; i < n; ++i) |
83 | if (srcp[i] == c8) |
84 | return &srcp[i]; |
85 | |
86 | return NULL; |
87 | } |
88 | |