1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Memory bandwidth monitoring and allocation library |
4 | * |
5 | * Copyright (C) 2018 Intel Corporation |
6 | * |
7 | * Authors: |
8 | * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, |
9 | * Fenghua Yu <fenghua.yu@intel.com> |
10 | */ |
11 | #include "resctrl.h" |
12 | |
13 | #define UNCORE_IMC "uncore_imc" |
14 | #define READ_FILE_NAME "events/cas_count_read" |
15 | #define WRITE_FILE_NAME "events/cas_count_write" |
16 | #define DYN_PMU_PATH "/sys/bus/event_source/devices" |
17 | #define SCALE 0.00006103515625 |
18 | #define MAX_IMCS 20 |
19 | #define MAX_TOKENS 5 |
20 | #define READ 0 |
21 | #define WRITE 1 |
22 | #define CON_MON_MBM_LOCAL_BYTES_PATH \ |
23 | "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" |
24 | |
25 | #define CON_MBM_LOCAL_BYTES_PATH \ |
26 | "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" |
27 | |
28 | #define MON_MBM_LOCAL_BYTES_PATH \ |
29 | "%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" |
30 | |
31 | #define MBM_LOCAL_BYTES_PATH \ |
32 | "%s/mon_data/mon_L3_%02d/mbm_local_bytes" |
33 | |
34 | #define CON_MON_LCC_OCCUP_PATH \ |
35 | "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" |
36 | |
37 | #define CON_LCC_OCCUP_PATH \ |
38 | "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" |
39 | |
40 | #define MON_LCC_OCCUP_PATH \ |
41 | "%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" |
42 | |
43 | #define LCC_OCCUP_PATH \ |
44 | "%s/mon_data/mon_L3_%02d/llc_occupancy" |
45 | |
46 | struct membw_read_format { |
47 | __u64 value; /* The value of the event */ |
48 | __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ |
49 | __u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ |
50 | __u64 id; /* if PERF_FORMAT_ID */ |
51 | }; |
52 | |
53 | struct imc_counter_config { |
54 | __u32 type; |
55 | __u64 event; |
56 | __u64 umask; |
57 | struct perf_event_attr pe; |
58 | struct membw_read_format return_value; |
59 | int fd; |
60 | }; |
61 | |
62 | static char mbm_total_path[1024]; |
63 | static int imcs; |
64 | static struct imc_counter_config imc_counters_config[MAX_IMCS][2]; |
65 | |
66 | void membw_initialize_perf_event_attr(int i, int j) |
67 | { |
68 | memset(&imc_counters_config[i][j].pe, 0, |
69 | sizeof(struct perf_event_attr)); |
70 | imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type; |
71 | imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr); |
72 | imc_counters_config[i][j].pe.disabled = 1; |
73 | imc_counters_config[i][j].pe.inherit = 1; |
74 | imc_counters_config[i][j].pe.exclude_guest = 0; |
75 | imc_counters_config[i][j].pe.config = |
76 | imc_counters_config[i][j].umask << 8 | |
77 | imc_counters_config[i][j].event; |
78 | imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER; |
79 | imc_counters_config[i][j].pe.read_format = |
80 | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; |
81 | } |
82 | |
83 | void membw_ioctl_perf_event_ioc_reset_enable(int i, int j) |
84 | { |
85 | ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0); |
86 | ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0); |
87 | } |
88 | |
89 | void membw_ioctl_perf_event_ioc_disable(int i, int j) |
90 | { |
91 | ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0); |
92 | } |
93 | |
94 | /* |
95 | * get_event_and_umask: Parse config into event and umask |
96 | * @cas_count_cfg: Config |
97 | * @count: iMC number |
98 | * @op: Operation (read/write) |
99 | */ |
100 | void get_event_and_umask(char *cas_count_cfg, int count, bool op) |
101 | { |
102 | char *token[MAX_TOKENS]; |
103 | int i = 0; |
104 | |
105 | strcat(p: cas_count_cfg, q: "," ); |
106 | token[0] = strtok(cas_count_cfg, "=," ); |
107 | |
108 | for (i = 1; i < MAX_TOKENS; i++) |
109 | token[i] = strtok(NULL, "=," ); |
110 | |
111 | for (i = 0; i < MAX_TOKENS; i++) { |
112 | if (!token[i]) |
113 | break; |
114 | if (strcmp(token[i], "event" ) == 0) { |
115 | if (op == READ) |
116 | imc_counters_config[count][READ].event = |
117 | strtol(token[i + 1], NULL, 16); |
118 | else |
119 | imc_counters_config[count][WRITE].event = |
120 | strtol(token[i + 1], NULL, 16); |
121 | } |
122 | if (strcmp(token[i], "umask" ) == 0) { |
123 | if (op == READ) |
124 | imc_counters_config[count][READ].umask = |
125 | strtol(token[i + 1], NULL, 16); |
126 | else |
127 | imc_counters_config[count][WRITE].umask = |
128 | strtol(token[i + 1], NULL, 16); |
129 | } |
130 | } |
131 | } |
132 | |
133 | static int open_perf_event(int i, int cpu_no, int j) |
134 | { |
135 | imc_counters_config[i][j].fd = |
136 | perf_event_open(hw_event: &imc_counters_config[i][j].pe, pid: -1, cpu: cpu_no, group_fd: -1, |
137 | PERF_FLAG_FD_CLOEXEC); |
138 | |
139 | if (imc_counters_config[i][j].fd == -1) { |
140 | fprintf(stderr, "Error opening leader %llx\n" , |
141 | imc_counters_config[i][j].pe.config); |
142 | |
143 | return -1; |
144 | } |
145 | |
146 | return 0; |
147 | } |
148 | |
149 | /* Get type and config (read and write) of an iMC counter */ |
150 | static int read_from_imc_dir(char *imc_dir, int count) |
151 | { |
152 | char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; |
153 | FILE *fp; |
154 | |
155 | /* Get type of iMC counter */ |
156 | sprintf(buf: imc_counter_type, fmt: "%s%s" , imc_dir, "type" ); |
157 | fp = fopen(imc_counter_type, "r" ); |
158 | if (!fp) { |
159 | ksft_perror(msg: "Failed to open iMC counter type file" ); |
160 | |
161 | return -1; |
162 | } |
163 | if (fscanf(fp, "%u" , &imc_counters_config[count][READ].type) <= 0) { |
164 | ksft_perror(msg: "Could not get iMC type" ); |
165 | fclose(fp); |
166 | |
167 | return -1; |
168 | } |
169 | fclose(fp); |
170 | |
171 | imc_counters_config[count][WRITE].type = |
172 | imc_counters_config[count][READ].type; |
173 | |
174 | /* Get read config */ |
175 | sprintf(buf: imc_counter_cfg, fmt: "%s%s" , imc_dir, READ_FILE_NAME); |
176 | fp = fopen(imc_counter_cfg, "r" ); |
177 | if (!fp) { |
178 | ksft_perror(msg: "Failed to open iMC config file" ); |
179 | |
180 | return -1; |
181 | } |
182 | if (fscanf(fp, "%s" , cas_count_cfg) <= 0) { |
183 | ksft_perror(msg: "Could not get iMC cas count read" ); |
184 | fclose(fp); |
185 | |
186 | return -1; |
187 | } |
188 | fclose(fp); |
189 | |
190 | get_event_and_umask(cas_count_cfg, count, READ); |
191 | |
192 | /* Get write config */ |
193 | sprintf(buf: imc_counter_cfg, fmt: "%s%s" , imc_dir, WRITE_FILE_NAME); |
194 | fp = fopen(imc_counter_cfg, "r" ); |
195 | if (!fp) { |
196 | ksft_perror(msg: "Failed to open iMC config file" ); |
197 | |
198 | return -1; |
199 | } |
200 | if (fscanf(fp, "%s" , cas_count_cfg) <= 0) { |
201 | ksft_perror(msg: "Could not get iMC cas count write" ); |
202 | fclose(fp); |
203 | |
204 | return -1; |
205 | } |
206 | fclose(fp); |
207 | |
208 | get_event_and_umask(cas_count_cfg, count, WRITE); |
209 | |
210 | return 0; |
211 | } |
212 | |
213 | /* |
214 | * A system can have 'n' number of iMC (Integrated Memory Controller) |
215 | * counters, get that 'n'. For each iMC counter get it's type and config. |
216 | * Also, each counter has two configs, one for read and the other for write. |
217 | * A config again has two parts, event and umask. |
218 | * Enumerate all these details into an array of structures. |
219 | * |
220 | * Return: >= 0 on success. < 0 on failure. |
221 | */ |
222 | static int num_of_imcs(void) |
223 | { |
224 | char imc_dir[512], *temp; |
225 | unsigned int count = 0; |
226 | struct dirent *ep; |
227 | int ret; |
228 | DIR *dp; |
229 | |
230 | dp = opendir(DYN_PMU_PATH); |
231 | if (dp) { |
232 | while ((ep = readdir(dp))) { |
233 | temp = strstr(ep->d_name, UNCORE_IMC); |
234 | if (!temp) |
235 | continue; |
236 | |
237 | /* |
238 | * imc counters are named as "uncore_imc_<n>", hence |
239 | * increment the pointer to point to <n>. Note that |
240 | * sizeof(UNCORE_IMC) would count for null character as |
241 | * well and hence the last underscore character in |
242 | * uncore_imc'_' need not be counted. |
243 | */ |
244 | temp = temp + sizeof(UNCORE_IMC); |
245 | |
246 | /* |
247 | * Some directories under "DYN_PMU_PATH" could have |
248 | * names like "uncore_imc_free_running", hence, check if |
249 | * first character is a numerical digit or not. |
250 | */ |
251 | if (temp[0] >= '0' && temp[0] <= '9') { |
252 | sprintf(buf: imc_dir, fmt: "%s/%s/" , DYN_PMU_PATH, |
253 | ep->d_name); |
254 | ret = read_from_imc_dir(imc_dir, count); |
255 | if (ret) { |
256 | closedir(dp); |
257 | |
258 | return ret; |
259 | } |
260 | count++; |
261 | } |
262 | } |
263 | closedir(dp); |
264 | if (count == 0) { |
265 | ksft_print_msg(msg: "Unable to find iMC counters\n" ); |
266 | |
267 | return -1; |
268 | } |
269 | } else { |
270 | ksft_perror(msg: "Unable to open PMU directory" ); |
271 | |
272 | return -1; |
273 | } |
274 | |
275 | return count; |
276 | } |
277 | |
278 | static int initialize_mem_bw_imc(void) |
279 | { |
280 | int imc, j; |
281 | |
282 | imcs = num_of_imcs(); |
283 | if (imcs <= 0) |
284 | return imcs; |
285 | |
286 | /* Initialize perf_event_attr structures for all iMC's */ |
287 | for (imc = 0; imc < imcs; imc++) { |
288 | for (j = 0; j < 2; j++) |
289 | membw_initialize_perf_event_attr(i: imc, j); |
290 | } |
291 | |
292 | return 0; |
293 | } |
294 | |
295 | /* |
296 | * get_mem_bw_imc: Memory band width as reported by iMC counters |
297 | * @cpu_no: CPU number that the benchmark PID is binded to |
298 | * @bw_report: Bandwidth report type (reads, writes) |
299 | * |
300 | * Memory B/W utilized by a process on a socket can be calculated using |
301 | * iMC counters. Perf events are used to read these counters. |
302 | * |
303 | * Return: = 0 on success. < 0 on failure. |
304 | */ |
305 | static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) |
306 | { |
307 | float reads, writes, of_mul_read, of_mul_write; |
308 | int imc, j, ret; |
309 | |
310 | /* Start all iMC counters to log values (both read and write) */ |
311 | reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; |
312 | for (imc = 0; imc < imcs; imc++) { |
313 | for (j = 0; j < 2; j++) { |
314 | ret = open_perf_event(i: imc, cpu_no, j); |
315 | if (ret) |
316 | return -1; |
317 | } |
318 | for (j = 0; j < 2; j++) |
319 | membw_ioctl_perf_event_ioc_reset_enable(i: imc, j); |
320 | } |
321 | |
322 | sleep(1); |
323 | |
324 | /* Stop counters after a second to get results (both read and write) */ |
325 | for (imc = 0; imc < imcs; imc++) { |
326 | for (j = 0; j < 2; j++) |
327 | membw_ioctl_perf_event_ioc_disable(i: imc, j); |
328 | } |
329 | |
330 | /* |
331 | * Get results which are stored in struct type imc_counter_config |
332 | * Take over flow into consideration before calculating total b/w |
333 | */ |
334 | for (imc = 0; imc < imcs; imc++) { |
335 | struct imc_counter_config *r = |
336 | &imc_counters_config[imc][READ]; |
337 | struct imc_counter_config *w = |
338 | &imc_counters_config[imc][WRITE]; |
339 | |
340 | if (read(r->fd, &r->return_value, |
341 | sizeof(struct membw_read_format)) == -1) { |
342 | ksft_perror(msg: "Couldn't get read b/w through iMC" ); |
343 | |
344 | return -1; |
345 | } |
346 | |
347 | if (read(w->fd, &w->return_value, |
348 | sizeof(struct membw_read_format)) == -1) { |
349 | ksft_perror(msg: "Couldn't get write bw through iMC" ); |
350 | |
351 | return -1; |
352 | } |
353 | |
354 | __u64 r_time_enabled = r->return_value.time_enabled; |
355 | __u64 r_time_running = r->return_value.time_running; |
356 | |
357 | if (r_time_enabled != r_time_running) |
358 | of_mul_read = (float)r_time_enabled / |
359 | (float)r_time_running; |
360 | |
361 | __u64 w_time_enabled = w->return_value.time_enabled; |
362 | __u64 w_time_running = w->return_value.time_running; |
363 | |
364 | if (w_time_enabled != w_time_running) |
365 | of_mul_write = (float)w_time_enabled / |
366 | (float)w_time_running; |
367 | reads += r->return_value.value * of_mul_read * SCALE; |
368 | writes += w->return_value.value * of_mul_write * SCALE; |
369 | } |
370 | |
371 | for (imc = 0; imc < imcs; imc++) { |
372 | close(imc_counters_config[imc][READ].fd); |
373 | close(imc_counters_config[imc][WRITE].fd); |
374 | } |
375 | |
376 | if (strcmp(bw_report, "reads" ) == 0) { |
377 | *bw_imc = reads; |
378 | return 0; |
379 | } |
380 | |
381 | if (strcmp(bw_report, "writes" ) == 0) { |
382 | *bw_imc = writes; |
383 | return 0; |
384 | } |
385 | |
386 | *bw_imc = reads + writes; |
387 | return 0; |
388 | } |
389 | |
390 | void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id) |
391 | { |
392 | if (ctrlgrp && mongrp) |
393 | sprintf(buf: mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH, |
394 | RESCTRL_PATH, ctrlgrp, mongrp, domain_id); |
395 | else if (!ctrlgrp && mongrp) |
396 | sprintf(buf: mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, |
397 | mongrp, domain_id); |
398 | else if (ctrlgrp && !mongrp) |
399 | sprintf(buf: mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, |
400 | ctrlgrp, domain_id); |
401 | else if (!ctrlgrp && !mongrp) |
402 | sprintf(buf: mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, |
403 | domain_id); |
404 | } |
405 | |
406 | /* |
407 | * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" |
408 | * @ctrlgrp: Name of the control monitor group (con_mon grp) |
409 | * @mongrp: Name of the monitor group (mon grp) |
410 | * @cpu_no: CPU number that the benchmark PID is binded to |
411 | * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) |
412 | */ |
413 | static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, |
414 | int cpu_no, char *resctrl_val) |
415 | { |
416 | int domain_id; |
417 | |
418 | if (get_domain_id(resource: "MB" , cpu_no, domain_id: &domain_id) < 0) { |
419 | ksft_print_msg(msg: "Could not get domain ID\n" ); |
420 | return; |
421 | } |
422 | |
423 | if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) |
424 | set_mbm_path(ctrlgrp, mongrp, domain_id); |
425 | |
426 | if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { |
427 | if (ctrlgrp) |
428 | sprintf(buf: mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, |
429 | RESCTRL_PATH, ctrlgrp, domain_id); |
430 | else |
431 | sprintf(buf: mbm_total_path, MBM_LOCAL_BYTES_PATH, |
432 | RESCTRL_PATH, domain_id); |
433 | } |
434 | } |
435 | |
436 | /* |
437 | * Get MBM Local bytes as reported by resctrl FS |
438 | * For MBM, |
439 | * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp |
440 | * 2. If only con_mon grp is given, then read from con_mon grp |
441 | * 3. If both are not given, then read from root con_mon grp |
442 | * For MBA, |
443 | * 1. If con_mon grp is given, then read from it |
444 | * 2. If con_mon grp is not given, then read from root con_mon grp |
445 | */ |
446 | static int get_mem_bw_resctrl(unsigned long *mbm_total) |
447 | { |
448 | FILE *fp; |
449 | |
450 | fp = fopen(mbm_total_path, "r" ); |
451 | if (!fp) { |
452 | ksft_perror(msg: "Failed to open total bw file" ); |
453 | |
454 | return -1; |
455 | } |
456 | if (fscanf(fp, "%lu" , mbm_total) <= 0) { |
457 | ksft_perror(msg: "Could not get mbm local bytes" ); |
458 | fclose(fp); |
459 | |
460 | return -1; |
461 | } |
462 | fclose(fp); |
463 | |
464 | return 0; |
465 | } |
466 | |
467 | pid_t bm_pid, ppid; |
468 | |
469 | void ctrlc_handler(int signum, siginfo_t *info, void *ptr) |
470 | { |
471 | /* Only kill child after bm_pid is set after fork() */ |
472 | if (bm_pid) |
473 | kill(bm_pid, SIGKILL); |
474 | umount_resctrlfs(); |
475 | tests_cleanup(); |
476 | ksft_print_msg(msg: "Ending\n\n" ); |
477 | |
478 | exit(EXIT_SUCCESS); |
479 | } |
480 | |
481 | /* |
482 | * Register CTRL-C handler for parent, as it has to kill |
483 | * child process before exiting. |
484 | */ |
485 | int signal_handler_register(void) |
486 | { |
487 | struct sigaction sigact = {}; |
488 | int ret = 0; |
489 | |
490 | bm_pid = 0; |
491 | |
492 | sigact.sa_sigaction = ctrlc_handler; |
493 | sigemptyset(set: &sigact.sa_mask); |
494 | sigact.sa_flags = SA_SIGINFO; |
495 | if (sigaction(SIGINT, &sigact, NULL) || |
496 | sigaction(SIGTERM, &sigact, NULL) || |
497 | sigaction(SIGHUP, &sigact, NULL)) { |
498 | ksft_perror(msg: "sigaction" ); |
499 | ret = -1; |
500 | } |
501 | return ret; |
502 | } |
503 | |
504 | /* |
505 | * Reset signal handler to SIG_DFL. |
506 | * Non-Value return because the caller should keep |
507 | * the error code of other path even if sigaction fails. |
508 | */ |
509 | void signal_handler_unregister(void) |
510 | { |
511 | struct sigaction sigact = {}; |
512 | |
513 | sigact.sa_handler = SIG_DFL; |
514 | sigemptyset(set: &sigact.sa_mask); |
515 | if (sigaction(SIGINT, &sigact, NULL) || |
516 | sigaction(SIGTERM, &sigact, NULL) || |
517 | sigaction(SIGHUP, &sigact, NULL)) { |
518 | ksft_perror(msg: "sigaction" ); |
519 | } |
520 | } |
521 | |
522 | /* |
523 | * print_results_bw: the memory bandwidth results are stored in a file |
524 | * @filename: file that stores the results |
525 | * @bm_pid: child pid that runs benchmark |
526 | * @bw_imc: perf imc counter value |
527 | * @bw_resc: memory bandwidth value |
528 | * |
529 | * Return: 0 on success, < 0 on error. |
530 | */ |
531 | static int print_results_bw(char *filename, int bm_pid, float bw_imc, |
532 | unsigned long bw_resc) |
533 | { |
534 | unsigned long diff = fabs(bw_imc - bw_resc); |
535 | FILE *fp; |
536 | |
537 | if (strcmp(filename, "stdio" ) == 0 || strcmp(filename, "stderr" ) == 0) { |
538 | printf("Pid: %d \t Mem_BW_iMC: %f \t " , bm_pid, bw_imc); |
539 | printf("Mem_BW_resc: %lu \t Difference: %lu\n" , bw_resc, diff); |
540 | } else { |
541 | fp = fopen(filename, "a" ); |
542 | if (!fp) { |
543 | ksft_perror(msg: "Cannot open results file" ); |
544 | |
545 | return -1; |
546 | } |
547 | if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n" , |
548 | bm_pid, bw_imc, bw_resc, diff) <= 0) { |
549 | ksft_print_msg(msg: "Could not log results\n" ); |
550 | fclose(fp); |
551 | |
552 | return -1; |
553 | } |
554 | fclose(fp); |
555 | } |
556 | |
557 | return 0; |
558 | } |
559 | |
560 | static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num) |
561 | { |
562 | if (strlen(ctrlgrp) && strlen(mongrp)) |
563 | sprintf(buf: llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, |
564 | ctrlgrp, mongrp, sock_num); |
565 | else if (!strlen(ctrlgrp) && strlen(mongrp)) |
566 | sprintf(buf: llc_occup_path, MON_LCC_OCCUP_PATH, RESCTRL_PATH, |
567 | mongrp, sock_num); |
568 | else if (strlen(ctrlgrp) && !strlen(mongrp)) |
569 | sprintf(buf: llc_occup_path, CON_LCC_OCCUP_PATH, RESCTRL_PATH, |
570 | ctrlgrp, sock_num); |
571 | else if (!strlen(ctrlgrp) && !strlen(mongrp)) |
572 | sprintf(buf: llc_occup_path, LCC_OCCUP_PATH, RESCTRL_PATH, sock_num); |
573 | } |
574 | |
575 | /* |
576 | * initialize_llc_occu_resctrl: Appropriately populate "llc_occup_path" |
577 | * @ctrlgrp: Name of the control monitor group (con_mon grp) |
578 | * @mongrp: Name of the monitor group (mon grp) |
579 | * @cpu_no: CPU number that the benchmark PID is binded to |
580 | * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc) |
581 | */ |
582 | static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, |
583 | int cpu_no, char *resctrl_val) |
584 | { |
585 | int domain_id; |
586 | |
587 | if (get_domain_id(resource: "L3" , cpu_no, domain_id: &domain_id) < 0) { |
588 | ksft_print_msg(msg: "Could not get domain ID\n" ); |
589 | return; |
590 | } |
591 | |
592 | if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) |
593 | set_cmt_path(ctrlgrp, mongrp, sock_num: domain_id); |
594 | } |
595 | |
596 | static int measure_vals(const struct user_params *uparams, |
597 | struct resctrl_val_param *param, |
598 | unsigned long *bw_resc_start) |
599 | { |
600 | unsigned long bw_resc, bw_resc_end; |
601 | float bw_imc; |
602 | int ret; |
603 | |
604 | /* |
605 | * Measure memory bandwidth from resctrl and from |
606 | * another source which is perf imc value or could |
607 | * be something else if perf imc event is not available. |
608 | * Compare the two values to validate resctrl value. |
609 | * It takes 1sec to measure the data. |
610 | */ |
611 | ret = get_mem_bw_imc(cpu_no: uparams->cpu, bw_report: param->bw_report, bw_imc: &bw_imc); |
612 | if (ret < 0) |
613 | return ret; |
614 | |
615 | ret = get_mem_bw_resctrl(mbm_total: &bw_resc_end); |
616 | if (ret < 0) |
617 | return ret; |
618 | |
619 | bw_resc = (bw_resc_end - *bw_resc_start) / MB; |
620 | ret = print_results_bw(filename: param->filename, bm_pid, bw_imc, bw_resc); |
621 | if (ret) |
622 | return ret; |
623 | |
624 | *bw_resc_start = bw_resc_end; |
625 | |
626 | return 0; |
627 | } |
628 | |
629 | /* |
630 | * run_benchmark - Run a specified benchmark or fill_buf (default benchmark) |
631 | * in specified signal. Direct benchmark stdio to /dev/null. |
632 | * @signum: signal number |
633 | * @info: signal info |
634 | * @ucontext: user context in signal handling |
635 | */ |
636 | static void run_benchmark(int signum, siginfo_t *info, void *ucontext) |
637 | { |
638 | int operation, ret, memflush; |
639 | char **benchmark_cmd; |
640 | size_t span; |
641 | bool once; |
642 | FILE *fp; |
643 | |
644 | benchmark_cmd = info->si_ptr; |
645 | |
646 | /* |
647 | * Direct stdio of child to /dev/null, so that only parent writes to |
648 | * stdio (console) |
649 | */ |
650 | fp = freopen("/dev/null" , "w" , stdout); |
651 | if (!fp) { |
652 | ksft_perror(msg: "Unable to direct benchmark status to /dev/null" ); |
653 | PARENT_EXIT(); |
654 | } |
655 | |
656 | if (strcmp(benchmark_cmd[0], "fill_buf" ) == 0) { |
657 | /* Execute default fill_buf benchmark */ |
658 | span = strtoul(benchmark_cmd[1], NULL, 10); |
659 | memflush = atoi(benchmark_cmd[2]); |
660 | operation = atoi(benchmark_cmd[3]); |
661 | if (!strcmp(benchmark_cmd[4], "true" )) { |
662 | once = true; |
663 | } else if (!strcmp(benchmark_cmd[4], "false" )) { |
664 | once = false; |
665 | } else { |
666 | ksft_print_msg(msg: "Invalid once parameter\n" ); |
667 | PARENT_EXIT(); |
668 | } |
669 | |
670 | if (run_fill_buf(span, memflush, operation, once)) |
671 | fprintf(stderr, "Error in running fill buffer\n" ); |
672 | } else { |
673 | /* Execute specified benchmark */ |
674 | ret = execvp(benchmark_cmd[0], benchmark_cmd); |
675 | if (ret) |
676 | ksft_perror(msg: "execvp" ); |
677 | } |
678 | |
679 | fclose(stdout); |
680 | ksft_print_msg(msg: "Unable to run specified benchmark\n" ); |
681 | PARENT_EXIT(); |
682 | } |
683 | |
684 | /* |
685 | * resctrl_val: execute benchmark and measure memory bandwidth on |
686 | * the benchmark |
687 | * @test: test information structure |
688 | * @uparams: user supplied parameters |
689 | * @benchmark_cmd: benchmark command and its arguments |
690 | * @param: parameters passed to resctrl_val() |
691 | * |
692 | * Return: 0 when the test was run, < 0 on error. |
693 | */ |
694 | int resctrl_val(const struct resctrl_test *test, |
695 | const struct user_params *uparams, |
696 | const char * const *benchmark_cmd, |
697 | struct resctrl_val_param *param) |
698 | { |
699 | char *resctrl_val = param->resctrl_val; |
700 | unsigned long bw_resc_start = 0; |
701 | struct sigaction sigact; |
702 | int ret = 0, pipefd[2]; |
703 | char pipe_message = 0; |
704 | union sigval value; |
705 | |
706 | if (strcmp(param->filename, "" ) == 0) |
707 | sprintf(buf: param->filename, fmt: "stdio" ); |
708 | |
709 | if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) || |
710 | !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { |
711 | ret = validate_bw_report_request(bw_report: param->bw_report); |
712 | if (ret) |
713 | return ret; |
714 | } |
715 | |
716 | /* |
717 | * If benchmark wasn't successfully started by child, then child should |
718 | * kill parent, so save parent's pid |
719 | */ |
720 | ppid = getpid(); |
721 | |
722 | if (pipe(pipefd)) { |
723 | ksft_perror(msg: "Unable to create pipe" ); |
724 | |
725 | return -1; |
726 | } |
727 | |
728 | /* |
729 | * Fork to start benchmark, save child's pid so that it can be killed |
730 | * when needed |
731 | */ |
732 | fflush(stdout); |
733 | bm_pid = fork(); |
734 | if (bm_pid == -1) { |
735 | ksft_perror(msg: "Unable to fork" ); |
736 | |
737 | return -1; |
738 | } |
739 | |
740 | if (bm_pid == 0) { |
741 | /* |
742 | * Mask all signals except SIGUSR1, parent uses SIGUSR1 to |
743 | * start benchmark |
744 | */ |
745 | sigfillset(set: &sigact.sa_mask); |
746 | sigdelset(set: &sigact.sa_mask, SIGUSR1); |
747 | |
748 | sigact.sa_sigaction = run_benchmark; |
749 | sigact.sa_flags = SA_SIGINFO; |
750 | |
751 | /* Register for "SIGUSR1" signal from parent */ |
752 | if (sigaction(SIGUSR1, &sigact, NULL)) { |
753 | ksft_perror(msg: "Can't register child for signal" ); |
754 | PARENT_EXIT(); |
755 | } |
756 | |
757 | /* Tell parent that child is ready */ |
758 | close(pipefd[0]); |
759 | pipe_message = 1; |
760 | if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) < |
761 | sizeof(pipe_message)) { |
762 | ksft_perror(msg: "Failed signaling parent process" ); |
763 | close(pipefd[1]); |
764 | return -1; |
765 | } |
766 | close(pipefd[1]); |
767 | |
768 | /* Suspend child until delivery of "SIGUSR1" from parent */ |
769 | sigsuspend(&sigact.sa_mask); |
770 | |
771 | ksft_perror(msg: "Child is done" ); |
772 | PARENT_EXIT(); |
773 | } |
774 | |
775 | ksft_print_msg(msg: "Benchmark PID: %d\n" , bm_pid); |
776 | |
777 | /* |
778 | * The cast removes constness but nothing mutates benchmark_cmd within |
779 | * the context of this process. At the receiving process, it becomes |
780 | * argv, which is mutable, on exec() but that's after fork() so it |
781 | * doesn't matter for the process running the tests. |
782 | */ |
783 | value.sival_ptr = (void *)benchmark_cmd; |
784 | |
785 | /* Taskset benchmark to specified cpu */ |
786 | ret = taskset_benchmark(bm_pid, uparams->cpu, NULL); |
787 | if (ret) |
788 | goto out; |
789 | |
790 | /* Write benchmark to specified control&monitoring grp in resctrl FS */ |
791 | ret = write_bm_pid_to_resctrl(bm_pid, ctrlgrp: param->ctrlgrp, mongrp: param->mongrp, |
792 | resctrl_val); |
793 | if (ret) |
794 | goto out; |
795 | |
796 | if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || |
797 | !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { |
798 | ret = initialize_mem_bw_imc(); |
799 | if (ret) |
800 | goto out; |
801 | |
802 | initialize_mem_bw_resctrl(ctrlgrp: param->ctrlgrp, mongrp: param->mongrp, |
803 | cpu_no: uparams->cpu, resctrl_val); |
804 | } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) |
805 | initialize_llc_occu_resctrl(ctrlgrp: param->ctrlgrp, mongrp: param->mongrp, |
806 | cpu_no: uparams->cpu, resctrl_val); |
807 | |
808 | /* Parent waits for child to be ready. */ |
809 | close(pipefd[1]); |
810 | while (pipe_message != 1) { |
811 | if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) < |
812 | sizeof(pipe_message)) { |
813 | ksft_perror(msg: "Failed reading message from child process" ); |
814 | close(pipefd[0]); |
815 | goto out; |
816 | } |
817 | } |
818 | close(pipefd[0]); |
819 | |
820 | /* Signal child to start benchmark */ |
821 | if (sigqueue(bm_pid, SIGUSR1, value) == -1) { |
822 | ksft_perror(msg: "sigqueue SIGUSR1 to child" ); |
823 | ret = -1; |
824 | goto out; |
825 | } |
826 | |
827 | /* Give benchmark enough time to fully run */ |
828 | sleep(1); |
829 | |
830 | /* Test runs until the callback setup() tells the test to stop. */ |
831 | while (1) { |
832 | ret = param->setup(test, uparams, param); |
833 | if (ret == END_OF_TESTS) { |
834 | ret = 0; |
835 | break; |
836 | } |
837 | if (ret < 0) |
838 | break; |
839 | |
840 | if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || |
841 | !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { |
842 | ret = measure_vals(uparams, param, bw_resc_start: &bw_resc_start); |
843 | if (ret) |
844 | break; |
845 | } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { |
846 | sleep(1); |
847 | ret = measure_llc_resctrl(filename: param->filename, bm_pid); |
848 | if (ret) |
849 | break; |
850 | } |
851 | } |
852 | |
853 | out: |
854 | kill(bm_pid, SIGKILL); |
855 | |
856 | return ret; |
857 | } |
858 | |