1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * fill_buf benchmark |
4 | * |
5 | * Copyright (C) 2018 Intel Corporation |
6 | * |
7 | * Authors: |
8 | * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, |
9 | * Fenghua Yu <fenghua.yu@intel.com> |
10 | */ |
11 | #include <stdio.h> |
12 | #include <unistd.h> |
13 | #include <stdlib.h> |
14 | #include <sys/types.h> |
15 | #include <sys/wait.h> |
16 | #include <inttypes.h> |
17 | #include <string.h> |
18 | |
19 | #include "resctrl.h" |
20 | |
21 | #define CL_SIZE (64) |
22 | #define PAGE_SIZE (4 * 1024) |
23 | #define MB (1024 * 1024) |
24 | |
25 | static void sb(void) |
26 | { |
27 | #if defined(__i386) || defined(__x86_64) |
28 | asm volatile("sfence\n\t" |
29 | : : : "memory" ); |
30 | #endif |
31 | } |
32 | |
33 | static void cl_flush(void *p) |
34 | { |
35 | #if defined(__i386) || defined(__x86_64) |
36 | asm volatile("clflush (%0)\n\t" |
37 | : : "r" (p) : "memory" ); |
38 | #endif |
39 | } |
40 | |
41 | void mem_flush(unsigned char *buf, size_t buf_size) |
42 | { |
43 | unsigned char *cp = buf; |
44 | size_t i = 0; |
45 | |
46 | buf_size = buf_size / CL_SIZE; /* mem size in cache lines */ |
47 | |
48 | for (i = 0; i < buf_size; i++) |
49 | cl_flush(p: &cp[i * CL_SIZE]); |
50 | |
51 | sb(); |
52 | } |
53 | |
54 | /* |
55 | * Buffer index step advance to workaround HW prefetching interfering with |
56 | * the measurements. |
57 | * |
58 | * Must be a prime to step through all indexes of the buffer. |
59 | * |
60 | * Some primes work better than others on some architectures (from MBA/MBM |
61 | * result stability point of view). |
62 | */ |
63 | #define FILL_IDX_MULT 23 |
64 | |
65 | static int fill_one_span_read(unsigned char *buf, size_t buf_size) |
66 | { |
67 | unsigned int size = buf_size / (CL_SIZE / 2); |
68 | unsigned int i, idx = 0; |
69 | unsigned char sum = 0; |
70 | |
71 | /* |
72 | * Read the buffer in an order that is unexpected by HW prefetching |
73 | * optimizations to prevent them interfering with the caching pattern. |
74 | * |
75 | * The read order is (in terms of halves of cachelines): |
76 | * i * FILL_IDX_MULT % size |
77 | * The formula is open-coded below to avoiding modulo inside the loop |
78 | * as it improves MBA/MBM result stability on some architectures. |
79 | */ |
80 | for (i = 0; i < size; i++) { |
81 | sum += buf[idx * (CL_SIZE / 2)]; |
82 | |
83 | idx += FILL_IDX_MULT; |
84 | while (idx >= size) |
85 | idx -= size; |
86 | } |
87 | |
88 | return sum; |
89 | } |
90 | |
91 | static void fill_one_span_write(unsigned char *buf, size_t buf_size) |
92 | { |
93 | unsigned char *end_ptr = buf + buf_size; |
94 | unsigned char *p; |
95 | |
96 | p = buf; |
97 | while (p < end_ptr) { |
98 | *p = '1'; |
99 | p += (CL_SIZE / 2); |
100 | } |
101 | } |
102 | |
103 | void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) |
104 | { |
105 | int ret = 0; |
106 | |
107 | while (1) { |
108 | ret = fill_one_span_read(buf, buf_size); |
109 | if (once) |
110 | break; |
111 | } |
112 | |
113 | /* Consume read result so that reading memory is not optimized out. */ |
114 | *value_sink = ret; |
115 | } |
116 | |
117 | static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once) |
118 | { |
119 | while (1) { |
120 | fill_one_span_write(buf, buf_size); |
121 | if (once) |
122 | break; |
123 | } |
124 | } |
125 | |
126 | unsigned char *alloc_buffer(size_t buf_size, int memflush) |
127 | { |
128 | void *buf = NULL; |
129 | uint64_t *p64; |
130 | size_t s64; |
131 | int ret; |
132 | |
133 | ret = posix_memalign(&buf, PAGE_SIZE, buf_size); |
134 | if (ret < 0) |
135 | return NULL; |
136 | |
137 | /* Initialize the buffer */ |
138 | p64 = buf; |
139 | s64 = buf_size / sizeof(uint64_t); |
140 | |
141 | while (s64 > 0) { |
142 | *p64 = (uint64_t)rand(); |
143 | p64 += (CL_SIZE / sizeof(uint64_t)); |
144 | s64 -= (CL_SIZE / sizeof(uint64_t)); |
145 | } |
146 | |
147 | /* Flush the memory before using to avoid "cache hot pages" effect */ |
148 | if (memflush) |
149 | mem_flush(buf, buf_size); |
150 | |
151 | return buf; |
152 | } |
153 | |
154 | int run_fill_buf(size_t buf_size, int memflush, int op, bool once) |
155 | { |
156 | unsigned char *buf; |
157 | |
158 | buf = alloc_buffer(buf_size, memflush); |
159 | if (!buf) |
160 | return -1; |
161 | |
162 | if (op == 0) |
163 | fill_cache_read(buf, buf_size, once); |
164 | else |
165 | fill_cache_write(buf, buf_size, once); |
166 | free(buf); |
167 | |
168 | return 0; |
169 | } |
170 | |