1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Cache Allocation Technology (CAT) test |
4 | * |
5 | * Copyright (C) 2018 Intel Corporation |
6 | * |
7 | * Authors: |
8 | * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, |
9 | * Fenghua Yu <fenghua.yu@intel.com> |
10 | */ |
11 | #include "resctrl.h" |
12 | #include <unistd.h> |
13 | |
14 | #define RESULT_FILE_NAME "result_cat" |
15 | #define NUM_OF_RUNS 5 |
16 | |
17 | /* |
18 | * Minimum difference in LLC misses between a test with n+1 bits CBM to the |
19 | * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4 |
20 | * bits in the CBM mask, the minimum difference must be at least |
21 | * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent. |
22 | * |
23 | * The relationship between number of used CBM bits and difference in LLC |
24 | * misses is not expected to be linear. With a small number of bits, the |
25 | * margin is smaller than with larger number of bits. For selftest purposes, |
26 | * however, linear approach is enough because ultimately only pass/fail |
27 | * decision has to be made and distinction between strong and stronger |
28 | * signal is irrelevant. |
29 | */ |
30 | #define MIN_DIFF_PERCENT_PER_BIT 1UL |
31 | |
32 | static int show_results_info(__u64 sum_llc_val, int no_of_bits, |
33 | unsigned long cache_span, |
34 | unsigned long min_diff_percent, |
35 | unsigned long num_of_runs, bool platform, |
36 | __s64 *prev_avg_llc_val) |
37 | { |
38 | __u64 avg_llc_val = 0; |
39 | float avg_diff; |
40 | int ret = 0; |
41 | |
42 | avg_llc_val = sum_llc_val / num_of_runs; |
43 | if (*prev_avg_llc_val) { |
44 | float delta = (__s64)(avg_llc_val - *prev_avg_llc_val); |
45 | |
46 | avg_diff = delta / *prev_avg_llc_val; |
47 | ret = platform && (avg_diff * 100) < (float)min_diff_percent; |
48 | |
49 | ksft_print_msg(msg: "%s Check cache miss rate changed more than %.1f%%\n" , |
50 | ret ? "Fail:" : "Pass:" , (float)min_diff_percent); |
51 | |
52 | ksft_print_msg(msg: "Percent diff=%.1f\n" , avg_diff * 100); |
53 | } |
54 | *prev_avg_llc_val = avg_llc_val; |
55 | |
56 | show_cache_info(no_of_bits, avg_llc_val, cache_span, lines: true); |
57 | |
58 | return ret; |
59 | } |
60 | |
61 | /* Remove the highest bit from CBM */ |
62 | static unsigned long next_mask(unsigned long current_mask) |
63 | { |
64 | return current_mask & (current_mask >> 1); |
65 | } |
66 | |
67 | static int check_results(struct resctrl_val_param *param, const char *cache_type, |
68 | unsigned long cache_total_size, unsigned long full_cache_mask, |
69 | unsigned long current_mask) |
70 | { |
71 | char *token_array[8], temp[512]; |
72 | __u64 sum_llc_perf_miss = 0; |
73 | __s64 prev_avg_llc_val = 0; |
74 | unsigned long alloc_size; |
75 | int runs = 0; |
76 | int fail = 0; |
77 | int ret; |
78 | FILE *fp; |
79 | |
80 | ksft_print_msg(msg: "Checking for pass/fail\n" ); |
81 | fp = fopen(param->filename, "r" ); |
82 | if (!fp) { |
83 | ksft_perror(msg: "Cannot open file" ); |
84 | |
85 | return -1; |
86 | } |
87 | |
88 | while (fgets(temp, sizeof(temp), fp)) { |
89 | char *token = strtok(temp, ":\t" ); |
90 | int fields = 0; |
91 | int bits; |
92 | |
93 | while (token) { |
94 | token_array[fields++] = token; |
95 | token = strtok(NULL, ":\t" ); |
96 | } |
97 | |
98 | sum_llc_perf_miss += strtoull(token_array[3], NULL, 0); |
99 | runs++; |
100 | |
101 | if (runs < NUM_OF_RUNS) |
102 | continue; |
103 | |
104 | if (!current_mask) { |
105 | ksft_print_msg(msg: "Unexpected empty cache mask\n" ); |
106 | break; |
107 | } |
108 | |
109 | alloc_size = cache_portion_size(cache_size: cache_total_size, portion_mask: current_mask, full_cache_mask); |
110 | |
111 | bits = count_bits(n: current_mask); |
112 | |
113 | ret = show_results_info(sum_llc_val: sum_llc_perf_miss, no_of_bits: bits, |
114 | cache_span: alloc_size / 64, |
115 | MIN_DIFF_PERCENT_PER_BIT * (bits - 1), |
116 | num_of_runs: runs, platform: get_vendor() == ARCH_INTEL, |
117 | prev_avg_llc_val: &prev_avg_llc_val); |
118 | if (ret) |
119 | fail = 1; |
120 | |
121 | runs = 0; |
122 | sum_llc_perf_miss = 0; |
123 | current_mask = next_mask(current_mask); |
124 | } |
125 | |
126 | fclose(fp); |
127 | |
128 | return fail; |
129 | } |
130 | |
131 | void cat_test_cleanup(void) |
132 | { |
133 | remove(RESULT_FILE_NAME); |
134 | } |
135 | |
136 | /* |
137 | * cat_test - Execute CAT benchmark and measure cache misses |
138 | * @test: Test information structure |
139 | * @uparams: User supplied parameters |
140 | * @param: Parameters passed to cat_test() |
141 | * @span: Buffer size for the benchmark |
142 | * @current_mask Start mask for the first iteration |
143 | * |
144 | * Run CAT selftest by varying the allocated cache portion and comparing the |
145 | * impact on cache misses (the result analysis is done in check_results() |
146 | * and show_results_info(), not in this function). |
147 | * |
148 | * One bit is removed from the CAT allocation bit mask (in current_mask) for |
149 | * each subsequent test which keeps reducing the size of the allocated cache |
150 | * portion. A single test flushes the buffer, reads it to warm up the cache, |
151 | * and reads the buffer again. The cache misses are measured during the last |
152 | * read pass. |
153 | * |
154 | * Return: 0 when the test was run, < 0 on error. |
155 | */ |
156 | static int cat_test(const struct resctrl_test *test, |
157 | const struct user_params *uparams, |
158 | struct resctrl_val_param *param, |
159 | size_t span, unsigned long current_mask) |
160 | { |
161 | char *resctrl_val = param->resctrl_val; |
162 | struct perf_event_read pe_read; |
163 | struct perf_event_attr pea; |
164 | cpu_set_t old_affinity; |
165 | unsigned char *buf; |
166 | char schemata[64]; |
167 | int ret, i, pe_fd; |
168 | pid_t bm_pid; |
169 | |
170 | if (strcmp(param->filename, "" ) == 0) |
171 | sprintf(buf: param->filename, fmt: "stdio" ); |
172 | |
173 | bm_pid = getpid(); |
174 | |
175 | /* Taskset benchmark to specified cpu */ |
176 | ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity); |
177 | if (ret) |
178 | return ret; |
179 | |
180 | /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ |
181 | ret = write_bm_pid_to_resctrl(bm_pid, ctrlgrp: param->ctrlgrp, mongrp: param->mongrp, |
182 | resctrl_val); |
183 | if (ret) |
184 | goto reset_affinity; |
185 | |
186 | perf_event_attr_initialize(pea: &pea, config: PERF_COUNT_HW_CACHE_MISSES); |
187 | perf_event_initialize_read_format(pe_read: &pe_read); |
188 | pe_fd = perf_open(pea: &pea, pid: bm_pid, cpu_no: uparams->cpu); |
189 | if (pe_fd < 0) { |
190 | ret = -1; |
191 | goto reset_affinity; |
192 | } |
193 | |
194 | buf = alloc_buffer(buf_size: span, memflush: 1); |
195 | if (!buf) { |
196 | ret = -1; |
197 | goto pe_close; |
198 | } |
199 | |
200 | while (current_mask) { |
201 | snprintf(buf: schemata, size: sizeof(schemata), fmt: "%lx" , param->mask & ~current_mask); |
202 | ret = write_schemata(ctrlgrp: "" , schemata, cpu_no: uparams->cpu, resource: test->resource); |
203 | if (ret) |
204 | goto free_buf; |
205 | snprintf(buf: schemata, size: sizeof(schemata), fmt: "%lx" , current_mask); |
206 | ret = write_schemata(ctrlgrp: param->ctrlgrp, schemata, cpu_no: uparams->cpu, resource: test->resource); |
207 | if (ret) |
208 | goto free_buf; |
209 | |
210 | for (i = 0; i < NUM_OF_RUNS; i++) { |
211 | mem_flush(buf, buf_size: span); |
212 | fill_cache_read(buf, buf_size: span, once: true); |
213 | |
214 | ret = perf_event_reset_enable(pe_fd); |
215 | if (ret) |
216 | goto free_buf; |
217 | |
218 | fill_cache_read(buf, buf_size: span, once: true); |
219 | |
220 | ret = perf_event_measure(pe_fd, pe_read: &pe_read, filename: param->filename, bm_pid); |
221 | if (ret) |
222 | goto free_buf; |
223 | } |
224 | current_mask = next_mask(current_mask); |
225 | } |
226 | |
227 | free_buf: |
228 | free(buf); |
229 | pe_close: |
230 | close(pe_fd); |
231 | reset_affinity: |
232 | taskset_restore(bm_pid, &old_affinity); |
233 | |
234 | return ret; |
235 | } |
236 | |
237 | static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) |
238 | { |
239 | unsigned long long_mask, start_mask, full_cache_mask; |
240 | unsigned long cache_total_size = 0; |
241 | int n = uparams->bits; |
242 | unsigned int start; |
243 | int count_of_bits; |
244 | size_t span; |
245 | int ret; |
246 | |
247 | ret = get_full_cbm(cache_type: test->resource, mask: &full_cache_mask); |
248 | if (ret) |
249 | return ret; |
250 | /* Get the largest contiguous exclusive portion of the cache */ |
251 | ret = get_mask_no_shareable(cache_type: test->resource, mask: &long_mask); |
252 | if (ret) |
253 | return ret; |
254 | |
255 | /* Get L3/L2 cache size */ |
256 | ret = get_cache_size(cpu_no: uparams->cpu, cache_type: test->resource, cache_size: &cache_total_size); |
257 | if (ret) |
258 | return ret; |
259 | ksft_print_msg(msg: "Cache size :%lu\n" , cache_total_size); |
260 | |
261 | count_of_bits = count_contiguous_bits(val: long_mask, start: &start); |
262 | |
263 | if (!n) |
264 | n = count_of_bits / 2; |
265 | |
266 | if (n > count_of_bits - 1) { |
267 | ksft_print_msg(msg: "Invalid input value for no_of_bits n!\n" ); |
268 | ksft_print_msg(msg: "Please enter value in range 1 to %d\n" , |
269 | count_of_bits - 1); |
270 | return -1; |
271 | } |
272 | start_mask = create_bit_mask(start, len: n); |
273 | |
274 | struct resctrl_val_param param = { |
275 | .resctrl_val = CAT_STR, |
276 | .ctrlgrp = "c1" , |
277 | .filename = RESULT_FILE_NAME, |
278 | .num_of_runs = 0, |
279 | }; |
280 | param.mask = long_mask; |
281 | span = cache_portion_size(cache_size: cache_total_size, portion_mask: start_mask, full_cache_mask); |
282 | |
283 | remove(param.filename); |
284 | |
285 | ret = cat_test(test, uparams, param: ¶m, span, current_mask: start_mask); |
286 | if (ret) |
287 | goto out; |
288 | |
289 | ret = check_results(param: ¶m, cache_type: test->resource, |
290 | cache_total_size, full_cache_mask, current_mask: start_mask); |
291 | out: |
292 | cat_test_cleanup(); |
293 | |
294 | return ret; |
295 | } |
296 | |
297 | static int noncont_cat_run_test(const struct resctrl_test *test, |
298 | const struct user_params *uparams) |
299 | { |
300 | unsigned long full_cache_mask, cont_mask, noncont_mask; |
301 | unsigned int eax, ebx, ecx, edx, sparse_masks; |
302 | int bit_center, ret; |
303 | char schemata[64]; |
304 | |
305 | /* Check to compare sparse_masks content to CPUID output. */ |
306 | ret = resource_info_unsigned_get(resource: test->resource, filename: "sparse_masks" , val: &sparse_masks); |
307 | if (ret) |
308 | return ret; |
309 | |
310 | if (!strcmp(test->resource, "L3" )) |
311 | __cpuid_count(0x10, 1, eax, ebx, ecx, edx); |
312 | else if (!strcmp(test->resource, "L2" )) |
313 | __cpuid_count(0x10, 2, eax, ebx, ecx, edx); |
314 | else |
315 | return -EINVAL; |
316 | |
317 | if (sparse_masks != ((ecx >> 3) & 1)) { |
318 | ksft_print_msg(msg: "CPUID output doesn't match 'sparse_masks' file content!\n" ); |
319 | return 1; |
320 | } |
321 | |
322 | /* Write checks initialization. */ |
323 | ret = get_full_cbm(cache_type: test->resource, mask: &full_cache_mask); |
324 | if (ret < 0) |
325 | return ret; |
326 | bit_center = count_bits(n: full_cache_mask) / 2; |
327 | |
328 | /* |
329 | * The bit_center needs to be at least 3 to properly calculate the CBM |
330 | * hole in the noncont_mask. If it's smaller return an error since the |
331 | * cache mask is too short and that shouldn't happen. |
332 | */ |
333 | if (bit_center < 3) |
334 | return -EINVAL; |
335 | cont_mask = full_cache_mask >> bit_center; |
336 | |
337 | /* Contiguous mask write check. */ |
338 | snprintf(buf: schemata, size: sizeof(schemata), fmt: "%lx" , cont_mask); |
339 | ret = write_schemata(ctrlgrp: "" , schemata, cpu_no: uparams->cpu, resource: test->resource); |
340 | if (ret) { |
341 | ksft_print_msg(msg: "Write of contiguous CBM failed\n" ); |
342 | return 1; |
343 | } |
344 | |
345 | /* |
346 | * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle. |
347 | * Output is compared with support information to catch any edge case errors. |
348 | */ |
349 | noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask; |
350 | snprintf(buf: schemata, size: sizeof(schemata), fmt: "%lx" , noncont_mask); |
351 | ret = write_schemata(ctrlgrp: "" , schemata, cpu_no: uparams->cpu, resource: test->resource); |
352 | if (ret && sparse_masks) |
353 | ksft_print_msg(msg: "Non-contiguous CBMs supported but write of non-contiguous CBM failed\n" ); |
354 | else if (ret && !sparse_masks) |
355 | ksft_print_msg(msg: "Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n" ); |
356 | else if (!ret && !sparse_masks) |
357 | ksft_print_msg(msg: "Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n" ); |
358 | |
359 | return !ret == !sparse_masks; |
360 | } |
361 | |
362 | static bool noncont_cat_feature_check(const struct resctrl_test *test) |
363 | { |
364 | if (!resctrl_resource_exists(resource: test->resource)) |
365 | return false; |
366 | |
367 | return resource_info_file_exists(resource: test->resource, file: "sparse_masks" ); |
368 | } |
369 | |
370 | struct resctrl_test l3_cat_test = { |
371 | .name = "L3_CAT" , |
372 | .group = "CAT" , |
373 | .resource = "L3" , |
374 | .feature_check = test_resource_feature_check, |
375 | .run_test = cat_run_test, |
376 | }; |
377 | |
378 | struct resctrl_test l3_noncont_cat_test = { |
379 | .name = "L3_NONCONT_CAT" , |
380 | .group = "CAT" , |
381 | .resource = "L3" , |
382 | .feature_check = noncont_cat_feature_check, |
383 | .run_test = noncont_cat_run_test, |
384 | }; |
385 | |
386 | struct resctrl_test l2_noncont_cat_test = { |
387 | .name = "L2_NONCONT_CAT" , |
388 | .group = "CAT" , |
389 | .resource = "L2" , |
390 | .feature_check = noncont_cat_feature_check, |
391 | .run_test = noncont_cat_run_test, |
392 | }; |
393 | |