1 | /* Measure memmove function combined throughput for different alignments. |
2 | Copyright (C) 2017-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* This microbenchmark measures the throughput of memmove for various sizes from |
20 | 1 byte to 32MiB, doubling every iteration and then misaligning by 0-15 |
21 | bytes. The copies are done from source to destination and then back and the |
22 | source walks forward across the array and the destination walks backward by |
23 | one byte each, thus measuring misaligned accesses as well. The idea is to |
24 | avoid caching effects by copying a different string and far enough from each |
25 | other, walking in different directions so that we can measure prefetcher |
26 | efficiency (software or hardware) more closely than with a loop copying the |
27 | same data over and over, which eventually only gives us L1 cache |
28 | performance. */ |
29 | |
30 | #ifndef MEMMOVE_RESULT |
31 | # define MEMMOVE_RESULT(dst, len) dst |
32 | # define START_SIZE 128 |
33 | # define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024) |
34 | # define TEST_MAIN |
35 | # define TEST_NAME "memmove" |
36 | # define TIMEOUT (20 * 60) |
37 | # include "bench-string.h" |
38 | |
39 | #define NO_OVERLAP 0 |
40 | #define PARTIAL_OVERLAP 1 |
41 | #define COMPLETE_OVERLAP 2 |
42 | |
43 | IMPL (memmove, 1) |
44 | #endif |
45 | |
46 | #include "json-lib.h" |
47 | |
48 | typedef char *(*proto_t) (char *, const char *, size_t); |
49 | |
50 | static void |
51 | do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, |
52 | size_t len) |
53 | { |
54 | size_t i = 0; |
55 | timing_t start, stop, cur; |
56 | |
57 | char *dst_end = dst + MIN_PAGE_SIZE - len; |
58 | char *src_end = src + MIN_PAGE_SIZE - len; |
59 | |
60 | TIMING_NOW (start); |
61 | /* Copy the entire buffer backwards, LEN at a time. */ |
62 | for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++) |
63 | CALL (impl, dst, src_end, len); |
64 | TIMING_NOW (stop); |
65 | |
66 | TIMING_DIFF (cur, start, stop); |
67 | |
68 | /* Get time taken per function call. */ |
69 | json_element_double (ctx: json_ctx, d: (double) cur / i); |
70 | } |
71 | |
72 | static void |
73 | do_test (json_ctx_t *json_ctx, size_t len, int overlap, int both_ways) |
74 | { |
75 | char *s1, *s2, *tmp; |
76 | size_t repeats; |
77 | |
78 | s1 = (char *) (buf1); |
79 | s2 = (char *) (buf2); |
80 | if (overlap != NO_OVERLAP) |
81 | s2 = s1; |
82 | if (overlap == PARTIAL_OVERLAP) |
83 | s2 += len / 2; |
84 | |
85 | for (repeats = both_ways ? 2 : 1; repeats; --repeats) |
86 | { |
87 | json_element_object_begin (ctx: json_ctx); |
88 | json_attr_uint (ctx: json_ctx, name: "length" , d: (double) len); |
89 | json_attr_string(ctx: json_ctx, name: "overlap" , |
90 | s: overlap == NO_OVERLAP ? "none" |
91 | : overlap == PARTIAL_OVERLAP ? "partial" |
92 | : "complete" ); |
93 | json_attr_uint (ctx: json_ctx, name: "dst > src" , d: (double) (s2 > s1)); |
94 | json_array_begin (ctx: json_ctx, name: "timings" ); |
95 | |
96 | |
97 | FOR_EACH_IMPL (impl, 0) |
98 | do_one_test (json_ctx, impl, dst: (char *) buf2, src: (char *) buf1, len); |
99 | |
100 | json_array_end (ctx: json_ctx); |
101 | json_element_object_end (ctx: json_ctx); |
102 | |
103 | tmp = s1; |
104 | s1 = s2; |
105 | s2 = tmp; |
106 | } |
107 | } |
108 | |
109 | int |
110 | test_main (void) |
111 | { |
112 | json_ctx_t json_ctx; |
113 | |
114 | test_init (); |
115 | |
116 | json_init (ctx: &json_ctx, indent_level: 0, stdout); |
117 | |
118 | json_document_begin (ctx: &json_ctx); |
119 | json_attr_string (ctx: &json_ctx, name: "timing_type" , TIMING_TYPE); |
120 | |
121 | json_attr_object_begin (ctx: &json_ctx, name: "functions" ); |
122 | json_attr_object_begin (ctx: &json_ctx, name: "memmove" ); |
123 | json_attr_string (ctx: &json_ctx, name: "bench-variant" , s: "walk" ); |
124 | |
125 | json_array_begin (ctx: &json_ctx, name: "ifuncs" ); |
126 | FOR_EACH_IMPL (impl, 0) |
127 | json_element_string (ctx: &json_ctx, s: impl->name); |
128 | json_array_end (ctx: &json_ctx); |
129 | |
130 | json_array_begin (ctx: &json_ctx, name: "results" ); |
131 | /* Non-overlapping buffers. */ |
132 | for (size_t i = START_SIZE; i <= MIN_PAGE_SIZE; i <<= 1) |
133 | { |
134 | do_test (json_ctx: &json_ctx, len: i, NO_OVERLAP, both_ways: 1); |
135 | do_test (json_ctx: &json_ctx, len: i + 1, NO_OVERLAP, both_ways: 1); |
136 | } |
137 | |
138 | /* Partially-overlapping buffers. */ |
139 | for (size_t i = START_SIZE; i <= MIN_PAGE_SIZE / 2; i <<= 1) |
140 | { |
141 | do_test (json_ctx: &json_ctx, len: i, PARTIAL_OVERLAP, both_ways: 1); |
142 | do_test (json_ctx: &json_ctx, len: i + 1, PARTIAL_OVERLAP, both_ways: 1); |
143 | } |
144 | |
145 | /* Complete-overlapping buffers. */ |
146 | for (size_t i = START_SIZE; i <= MIN_PAGE_SIZE; i <<= 1) |
147 | { |
148 | do_test (json_ctx: &json_ctx, len: i, COMPLETE_OVERLAP, both_ways: 0); |
149 | do_test (json_ctx: &json_ctx, len: i + 1, COMPLETE_OVERLAP, both_ways: 0); |
150 | } |
151 | |
152 | json_array_end (ctx: &json_ctx); |
153 | json_attr_object_end (ctx: &json_ctx); |
154 | json_attr_object_end (ctx: &json_ctx); |
155 | json_document_end (ctx: &json_ctx); |
156 | |
157 | return ret; |
158 | } |
159 | |
160 | #include <support/test-driver.c> |
161 | |