1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) |
2 | /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ |
3 | #define _GNU_SOURCE |
4 | #include <argp.h> |
5 | #include <string.h> |
6 | #include <stdlib.h> |
7 | #include <sched.h> |
8 | #include <pthread.h> |
9 | #include <dirent.h> |
10 | #include <signal.h> |
11 | #include <fcntl.h> |
12 | #include <unistd.h> |
13 | #include <sys/time.h> |
14 | #include <sys/sysinfo.h> |
15 | #include <sys/stat.h> |
16 | #include <bpf/libbpf.h> |
17 | #include <bpf/btf.h> |
18 | #include <libelf.h> |
19 | #include <gelf.h> |
20 | #include <float.h> |
21 | #include <math.h> |
22 | |
23 | #ifndef ARRAY_SIZE |
24 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) |
25 | #endif |
26 | |
27 | enum stat_id { |
28 | VERDICT, |
29 | DURATION, |
30 | TOTAL_INSNS, |
31 | TOTAL_STATES, |
32 | PEAK_STATES, |
33 | MAX_STATES_PER_INSN, |
34 | MARK_READ_MAX_LEN, |
35 | |
36 | FILE_NAME, |
37 | PROG_NAME, |
38 | |
39 | ALL_STATS_CNT, |
40 | NUM_STATS_CNT = FILE_NAME - VERDICT, |
41 | }; |
42 | |
43 | /* In comparison mode each stat can specify up to four different values: |
44 | * - A side value; |
45 | * - B side value; |
46 | * - absolute diff value; |
47 | * - relative (percentage) diff value. |
48 | * |
49 | * When specifying stat specs in comparison mode, user can use one of the |
50 | * following variant suffixes to specify which exact variant should be used for |
51 | * ordering or filtering: |
52 | * - `_a` for A side value; |
53 | * - `_b` for B side value; |
54 | * - `_diff` for absolute diff value; |
55 | * - `_pct` for relative (percentage) diff value. |
56 | * |
57 | * If no variant suffix is provided, then `_b` (control data) is assumed. |
58 | * |
59 | * As an example, let's say instructions stat has the following output: |
60 | * |
61 | * Insns (A) Insns (B) Insns (DIFF) |
62 | * --------- --------- -------------- |
63 | * 21547 20920 -627 (-2.91%) |
64 | * |
65 | * Then: |
66 | * - 21547 is A side value (insns_a); |
67 | * - 20920 is B side value (insns_b); |
68 | * - -627 is absolute diff value (insns_diff); |
69 | * - -2.91% is relative diff value (insns_pct). |
70 | * |
71 | * For verdict there is no verdict_pct variant. |
72 | * For file and program name, _a and _b variants are equivalent and there are |
73 | * no _diff or _pct variants. |
74 | */ |
75 | enum stat_variant { |
76 | VARIANT_A, |
77 | VARIANT_B, |
78 | VARIANT_DIFF, |
79 | VARIANT_PCT, |
80 | }; |
81 | |
82 | struct verif_stats { |
83 | char *file_name; |
84 | char *prog_name; |
85 | |
86 | long stats[NUM_STATS_CNT]; |
87 | }; |
88 | |
89 | /* joined comparison mode stats */ |
90 | struct verif_stats_join { |
91 | char *file_name; |
92 | char *prog_name; |
93 | |
94 | const struct verif_stats *stats_a; |
95 | const struct verif_stats *stats_b; |
96 | }; |
97 | |
98 | struct stat_specs { |
99 | int spec_cnt; |
100 | enum stat_id ids[ALL_STATS_CNT]; |
101 | enum stat_variant variants[ALL_STATS_CNT]; |
102 | bool asc[ALL_STATS_CNT]; |
103 | bool abs[ALL_STATS_CNT]; |
104 | int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ |
105 | }; |
106 | |
107 | enum resfmt { |
108 | RESFMT_TABLE, |
109 | RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */ |
110 | RESFMT_CSV, |
111 | }; |
112 | |
113 | enum filter_kind { |
114 | FILTER_NAME, |
115 | FILTER_STAT, |
116 | }; |
117 | |
118 | enum operator_kind { |
119 | OP_EQ, /* == or = */ |
120 | OP_NEQ, /* != or <> */ |
121 | OP_LT, /* < */ |
122 | OP_LE, /* <= */ |
123 | OP_GT, /* > */ |
124 | OP_GE, /* >= */ |
125 | }; |
126 | |
127 | struct filter { |
128 | enum filter_kind kind; |
129 | /* FILTER_NAME */ |
130 | char *any_glob; |
131 | char *file_glob; |
132 | char *prog_glob; |
133 | /* FILTER_STAT */ |
134 | enum operator_kind op; |
135 | int stat_id; |
136 | enum stat_variant stat_var; |
137 | long value; |
138 | bool abs; |
139 | }; |
140 | |
141 | static struct env { |
142 | char **filenames; |
143 | int filename_cnt; |
144 | bool verbose; |
145 | bool debug; |
146 | bool quiet; |
147 | bool force_checkpoints; |
148 | bool force_reg_invariants; |
149 | enum resfmt out_fmt; |
150 | bool show_version; |
151 | bool comparison_mode; |
152 | bool replay_mode; |
153 | int top_n; |
154 | |
155 | int log_level; |
156 | int log_size; |
157 | bool log_fixed; |
158 | |
159 | struct verif_stats *prog_stats; |
160 | int prog_stat_cnt; |
161 | |
162 | /* baseline_stats is allocated and used only in comparison mode */ |
163 | struct verif_stats *baseline_stats; |
164 | int baseline_stat_cnt; |
165 | |
166 | struct verif_stats_join *join_stats; |
167 | int join_stat_cnt; |
168 | |
169 | struct stat_specs output_spec; |
170 | struct stat_specs sort_spec; |
171 | |
172 | struct filter *allow_filters; |
173 | struct filter *deny_filters; |
174 | int allow_filter_cnt; |
175 | int deny_filter_cnt; |
176 | |
177 | int files_processed; |
178 | int files_skipped; |
179 | int progs_processed; |
180 | int progs_skipped; |
181 | } env; |
182 | |
183 | static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) |
184 | { |
185 | if (!env.verbose) |
186 | return 0; |
187 | if (level == LIBBPF_DEBUG && !env.debug) |
188 | return 0; |
189 | return vfprintf(stderr, format, args); |
190 | } |
191 | |
192 | #ifndef VERISTAT_VERSION |
193 | #define VERISTAT_VERSION "<kernel>" |
194 | #endif |
195 | |
196 | const char *argp_program_version = "veristat v" VERISTAT_VERSION; |
197 | const char *argp_program_bug_address = "<bpf@vger.kernel.org>" ; |
198 | const char argp_program_doc[] = |
199 | "veristat BPF verifier stats collection and comparison tool.\n" |
200 | "\n" |
201 | "USAGE: veristat <obj-file> [<obj-file>...]\n" |
202 | " OR: veristat -C <baseline.csv> <comparison.csv>\n" |
203 | " OR: veristat -R <results.csv>\n" ; |
204 | |
205 | enum { |
206 | OPT_LOG_FIXED = 1000, |
207 | OPT_LOG_SIZE = 1001, |
208 | }; |
209 | |
210 | static const struct argp_option opts[] = { |
211 | { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, |
212 | { "version" , 'V', NULL, 0, "Print version" }, |
213 | { "verbose" , 'v', NULL, 0, "Verbose mode" }, |
214 | { "debug" , 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" }, |
215 | { "log-level" , 'l', "LEVEL" , 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, |
216 | { "log-fixed" , OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, |
217 | { "log-size" , OPT_LOG_SIZE, "BYTES" , 0, "Customize verifier log size (default to 16MB)" }, |
218 | { "top-n" , 'n', "N" , 0, "Emit only up to first N results." }, |
219 | { "quiet" , 'q', NULL, 0, "Quiet mode" }, |
220 | { "emit" , 'e', "SPEC" , 0, "Specify stats to be emitted" }, |
221 | { "sort" , 's', "SPEC" , 0, "Specify sort order" }, |
222 | { "output-format" , 'o', "FMT" , 0, "Result output format (table, csv), default is table." }, |
223 | { "compare" , 'C', NULL, 0, "Comparison mode" }, |
224 | { "replay" , 'R', NULL, 0, "Replay mode" }, |
225 | { "filter" , 'f', "FILTER" , 0, "Filter expressions (or @filename for file with expressions)." }, |
226 | { "test-states" , 't', NULL, 0, |
227 | "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, |
228 | { "test-reg-invariants" , 'r', NULL, 0, |
229 | "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, |
230 | {}, |
231 | }; |
232 | |
233 | static int parse_stats(const char *stats_str, struct stat_specs *specs); |
234 | static int append_filter(struct filter **filters, int *cnt, const char *str); |
235 | static int append_filter_file(const char *path); |
236 | |
237 | static error_t parse_arg(int key, char *arg, struct argp_state *state) |
238 | { |
239 | void *tmp; |
240 | int err; |
241 | |
242 | switch (key) { |
243 | case 'h': |
244 | argp_state_help(state, stderr, ARGP_HELP_STD_HELP); |
245 | break; |
246 | case 'V': |
247 | env.show_version = true; |
248 | break; |
249 | case 'v': |
250 | env.verbose = true; |
251 | break; |
252 | case 'd': |
253 | env.debug = true; |
254 | env.verbose = true; |
255 | break; |
256 | case 'q': |
257 | env.quiet = true; |
258 | break; |
259 | case 'e': |
260 | err = parse_stats(stats_str: arg, specs: &env.output_spec); |
261 | if (err) |
262 | return err; |
263 | break; |
264 | case 's': |
265 | err = parse_stats(stats_str: arg, specs: &env.sort_spec); |
266 | if (err) |
267 | return err; |
268 | break; |
269 | case 'o': |
270 | if (strcmp(arg, "table" ) == 0) { |
271 | env.out_fmt = RESFMT_TABLE; |
272 | } else if (strcmp(arg, "csv" ) == 0) { |
273 | env.out_fmt = RESFMT_CSV; |
274 | } else { |
275 | fprintf(stderr, "Unrecognized output format '%s'\n" , arg); |
276 | return -EINVAL; |
277 | } |
278 | break; |
279 | case 'l': |
280 | errno = 0; |
281 | env.log_level = strtol(arg, NULL, 10); |
282 | if (errno) { |
283 | fprintf(stderr, "invalid log level: %s\n" , arg); |
284 | argp_usage(state); |
285 | } |
286 | break; |
287 | case OPT_LOG_FIXED: |
288 | env.log_fixed = true; |
289 | break; |
290 | case OPT_LOG_SIZE: |
291 | errno = 0; |
292 | env.log_size = strtol(arg, NULL, 10); |
293 | if (errno) { |
294 | fprintf(stderr, "invalid log size: %s\n" , arg); |
295 | argp_usage(state); |
296 | } |
297 | break; |
298 | case 't': |
299 | env.force_checkpoints = true; |
300 | break; |
301 | case 'r': |
302 | env.force_reg_invariants = true; |
303 | break; |
304 | case 'n': |
305 | errno = 0; |
306 | env.top_n = strtol(arg, NULL, 10); |
307 | if (errno) { |
308 | fprintf(stderr, "invalid top N specifier: %s\n" , arg); |
309 | argp_usage(state); |
310 | } |
311 | case 'C': |
312 | env.comparison_mode = true; |
313 | break; |
314 | case 'R': |
315 | env.replay_mode = true; |
316 | break; |
317 | case 'f': |
318 | if (arg[0] == '@') |
319 | err = append_filter_file(path: arg + 1); |
320 | else if (arg[0] == '!') |
321 | err = append_filter(filters: &env.deny_filters, cnt: &env.deny_filter_cnt, str: arg + 1); |
322 | else |
323 | err = append_filter(filters: &env.allow_filters, cnt: &env.allow_filter_cnt, str: arg); |
324 | if (err) { |
325 | fprintf(stderr, "Failed to collect program filter expressions: %d\n" , err); |
326 | return err; |
327 | } |
328 | break; |
329 | case ARGP_KEY_ARG: |
330 | tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); |
331 | if (!tmp) |
332 | return -ENOMEM; |
333 | env.filenames = tmp; |
334 | env.filenames[env.filename_cnt] = strdup(arg); |
335 | if (!env.filenames[env.filename_cnt]) |
336 | return -ENOMEM; |
337 | env.filename_cnt++; |
338 | break; |
339 | default: |
340 | return ARGP_ERR_UNKNOWN; |
341 | } |
342 | return 0; |
343 | } |
344 | |
345 | static const struct argp argp = { |
346 | .options = opts, |
347 | .parser = parse_arg, |
348 | .doc = argp_program_doc, |
349 | }; |
350 | |
351 | |
352 | /* Adapted from perf/util/string.c */ |
353 | static bool glob_matches(const char *str, const char *pat) |
354 | { |
355 | while (*str && *pat && *pat != '*') { |
356 | if (*str != *pat) |
357 | return false; |
358 | str++; |
359 | pat++; |
360 | } |
361 | /* Check wild card */ |
362 | if (*pat == '*') { |
363 | while (*pat == '*') |
364 | pat++; |
365 | if (!*pat) /* Tail wild card matches all */ |
366 | return true; |
367 | while (*str) |
368 | if (glob_matches(str++, pat)) |
369 | return true; |
370 | } |
371 | return !*str && !*pat; |
372 | } |
373 | |
374 | static bool is_bpf_obj_file(const char *path) { |
375 | Elf64_Ehdr *ehdr; |
376 | int fd, err = -EINVAL; |
377 | Elf *elf = NULL; |
378 | |
379 | fd = open(path, O_RDONLY | O_CLOEXEC); |
380 | if (fd < 0) |
381 | return true; /* we'll fail later and propagate error */ |
382 | |
383 | /* ensure libelf is initialized */ |
384 | (void)elf_version(EV_CURRENT); |
385 | |
386 | elf = elf_begin(fd, ELF_C_READ, NULL); |
387 | if (!elf) |
388 | goto cleanup; |
389 | |
390 | if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64) |
391 | goto cleanup; |
392 | |
393 | ehdr = elf64_getehdr(elf); |
394 | /* Old LLVM set e_machine to EM_NONE */ |
395 | if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) |
396 | goto cleanup; |
397 | |
398 | err = 0; |
399 | cleanup: |
400 | if (elf) |
401 | elf_end(elf); |
402 | close(fd); |
403 | return err == 0; |
404 | } |
405 | |
406 | static bool should_process_file_prog(const char *filename, const char *prog_name) |
407 | { |
408 | struct filter *f; |
409 | int i, allow_cnt = 0; |
410 | |
411 | for (i = 0; i < env.deny_filter_cnt; i++) { |
412 | f = &env.deny_filters[i]; |
413 | if (f->kind != FILTER_NAME) |
414 | continue; |
415 | |
416 | if (f->any_glob && glob_matches(filename, f->any_glob)) |
417 | return false; |
418 | if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob)) |
419 | return false; |
420 | if (f->file_glob && glob_matches(filename, f->file_glob)) |
421 | return false; |
422 | if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob)) |
423 | return false; |
424 | } |
425 | |
426 | for (i = 0; i < env.allow_filter_cnt; i++) { |
427 | f = &env.allow_filters[i]; |
428 | if (f->kind != FILTER_NAME) |
429 | continue; |
430 | |
431 | allow_cnt++; |
432 | if (f->any_glob) { |
433 | if (glob_matches(filename, f->any_glob)) |
434 | return true; |
435 | /* If we don't know program name yet, any_glob filter |
436 | * has to assume that current BPF object file might be |
437 | * relevant; we'll check again later on after opening |
438 | * BPF object file, at which point program name will |
439 | * be known finally. |
440 | */ |
441 | if (!prog_name || glob_matches(prog_name, f->any_glob)) |
442 | return true; |
443 | } else { |
444 | if (f->file_glob && !glob_matches(filename, f->file_glob)) |
445 | continue; |
446 | if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob)) |
447 | continue; |
448 | return true; |
449 | } |
450 | } |
451 | |
452 | /* if there are no file/prog name allow filters, allow all progs, |
453 | * unless they are denied earlier explicitly |
454 | */ |
455 | return allow_cnt == 0; |
456 | } |
457 | |
458 | static struct { |
459 | enum operator_kind op_kind; |
460 | const char *op_str; |
461 | } operators[] = { |
462 | /* Order of these definitions matter to avoid situations like '<' |
463 | * matching part of what is actually a '<>' operator. That is, |
464 | * substrings should go last. |
465 | */ |
466 | { OP_EQ, "==" }, |
467 | { OP_NEQ, "!=" }, |
468 | { OP_NEQ, "<>" }, |
469 | { OP_LE, "<=" }, |
470 | { OP_LT, "<" }, |
471 | { OP_GE, ">=" }, |
472 | { OP_GT, ">" }, |
473 | { OP_EQ, "=" }, |
474 | }; |
475 | |
476 | static bool parse_stat_id_var(const char *name, size_t len, int *id, |
477 | enum stat_variant *var, bool *is_abs); |
478 | |
479 | static int append_filter(struct filter **filters, int *cnt, const char *str) |
480 | { |
481 | struct filter *f; |
482 | void *tmp; |
483 | const char *p; |
484 | int i; |
485 | |
486 | tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters)); |
487 | if (!tmp) |
488 | return -ENOMEM; |
489 | *filters = tmp; |
490 | |
491 | f = &(*filters)[*cnt]; |
492 | memset(f, 0, sizeof(*f)); |
493 | |
494 | /* First, let's check if it's a stats filter of the following form: |
495 | * <stat><op><value, where: |
496 | * - <stat> is one of supported numerical stats (verdict is also |
497 | * considered numerical, failure == 0, success == 1); |
498 | * - <op> is comparison operator (see `operators` definitions); |
499 | * - <value> is an integer (or failure/success, or false/true as |
500 | * special aliases for 0 and 1, respectively). |
501 | * If the form doesn't match what user provided, we assume file/prog |
502 | * glob filter. |
503 | */ |
504 | for (i = 0; i < ARRAY_SIZE(operators); i++) { |
505 | enum stat_variant var; |
506 | int id; |
507 | long val; |
508 | const char *end = str; |
509 | const char *op_str; |
510 | bool is_abs; |
511 | |
512 | op_str = operators[i].op_str; |
513 | p = strstr(str, op_str); |
514 | if (!p) |
515 | continue; |
516 | |
517 | if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) { |
518 | fprintf(stderr, "Unrecognized stat name in '%s'!\n" , str); |
519 | return -EINVAL; |
520 | } |
521 | if (id >= FILE_NAME) { |
522 | fprintf(stderr, "Non-integer stat is specified in '%s'!\n" , str); |
523 | return -EINVAL; |
524 | } |
525 | |
526 | p += strlen(op_str); |
527 | |
528 | if (strcasecmp(p, "true" ) == 0 || |
529 | strcasecmp(p, "t" ) == 0 || |
530 | strcasecmp(p, "success" ) == 0 || |
531 | strcasecmp(p, "succ" ) == 0 || |
532 | strcasecmp(p, "s" ) == 0 || |
533 | strcasecmp(p, "match" ) == 0 || |
534 | strcasecmp(p, "m" ) == 0) { |
535 | val = 1; |
536 | } else if (strcasecmp(p, "false" ) == 0 || |
537 | strcasecmp(p, "f" ) == 0 || |
538 | strcasecmp(p, "failure" ) == 0 || |
539 | strcasecmp(p, "fail" ) == 0 || |
540 | strcasecmp(p, "mismatch" ) == 0 || |
541 | strcasecmp(p, "mis" ) == 0) { |
542 | val = 0; |
543 | } else { |
544 | errno = 0; |
545 | val = strtol(p, (char **)&end, 10); |
546 | if (errno || end == p || *end != '\0' ) { |
547 | fprintf(stderr, "Invalid integer value in '%s'!\n" , str); |
548 | return -EINVAL; |
549 | } |
550 | } |
551 | |
552 | f->kind = FILTER_STAT; |
553 | f->stat_id = id; |
554 | f->stat_var = var; |
555 | f->op = operators[i].op_kind; |
556 | f->abs = true; |
557 | f->value = val; |
558 | |
559 | *cnt += 1; |
560 | return 0; |
561 | } |
562 | |
563 | /* File/prog filter can be specified either as '<glob>' or |
564 | * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to |
565 | * both file and program names. This seems to be way more useful in |
566 | * practice. If user needs full control, they can use '/<prog-glob>' |
567 | * form to glob just program name, or '<file-glob>/' to glob only file |
568 | * name. But usually common <glob> seems to be the most useful and |
569 | * ergonomic way. |
570 | */ |
571 | f->kind = FILTER_NAME; |
572 | p = strchr(str, '/'); |
573 | if (!p) { |
574 | f->any_glob = strdup(str); |
575 | if (!f->any_glob) |
576 | return -ENOMEM; |
577 | } else { |
578 | if (str != p) { |
579 | /* non-empty file glob */ |
580 | f->file_glob = strndup(str, p - str); |
581 | if (!f->file_glob) |
582 | return -ENOMEM; |
583 | } |
584 | if (strlen(p + 1) > 0) { |
585 | /* non-empty prog glob */ |
586 | f->prog_glob = strdup(p + 1); |
587 | if (!f->prog_glob) { |
588 | free(f->file_glob); |
589 | f->file_glob = NULL; |
590 | return -ENOMEM; |
591 | } |
592 | } |
593 | } |
594 | |
595 | *cnt += 1; |
596 | return 0; |
597 | } |
598 | |
599 | static int append_filter_file(const char *path) |
600 | { |
601 | char buf[1024]; |
602 | FILE *f; |
603 | int err = 0; |
604 | |
605 | f = fopen(path, "r" ); |
606 | if (!f) { |
607 | err = -errno; |
608 | fprintf(stderr, "Failed to open filters in '%s': %d\n" , path, err); |
609 | return err; |
610 | } |
611 | |
612 | while (fscanf(f, " %1023[^\n]\n" , buf) == 1) { |
613 | /* lines starting with # are comments, skip them */ |
614 | if (buf[0] == '\0' || buf[0] == '#') |
615 | continue; |
616 | /* lines starting with ! are negative match filters */ |
617 | if (buf[0] == '!') |
618 | err = append_filter(filters: &env.deny_filters, cnt: &env.deny_filter_cnt, str: buf + 1); |
619 | else |
620 | err = append_filter(filters: &env.allow_filters, cnt: &env.allow_filter_cnt, str: buf); |
621 | if (err) |
622 | goto cleanup; |
623 | } |
624 | |
625 | cleanup: |
626 | fclose(f); |
627 | return err; |
628 | } |
629 | |
630 | static const struct stat_specs default_output_spec = { |
631 | .spec_cnt = 7, |
632 | .ids = { |
633 | FILE_NAME, PROG_NAME, VERDICT, DURATION, |
634 | TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, |
635 | }, |
636 | }; |
637 | |
638 | static const struct stat_specs default_csv_output_spec = { |
639 | .spec_cnt = 9, |
640 | .ids = { |
641 | FILE_NAME, PROG_NAME, VERDICT, DURATION, |
642 | TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, |
643 | MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, |
644 | }, |
645 | }; |
646 | |
647 | static const struct stat_specs default_sort_spec = { |
648 | .spec_cnt = 2, |
649 | .ids = { |
650 | FILE_NAME, PROG_NAME, |
651 | }, |
652 | .asc = { true, true, }, |
653 | }; |
654 | |
655 | /* sorting for comparison mode to join two data sets */ |
656 | static const struct stat_specs join_sort_spec = { |
657 | .spec_cnt = 2, |
658 | .ids = { |
659 | FILE_NAME, PROG_NAME, |
660 | }, |
661 | .asc = { true, true, }, |
662 | }; |
663 | |
664 | static struct stat_def { |
665 | const char *; |
666 | const char *names[4]; |
667 | bool asc_by_default; |
668 | bool left_aligned; |
669 | } stat_defs[] = { |
670 | [FILE_NAME] = { "File" , {"file_name" , "filename" , "file" }, true /* asc */, true /* left */ }, |
671 | [PROG_NAME] = { "Program" , {"prog_name" , "progname" , "prog" }, true /* asc */, true /* left */ }, |
672 | [VERDICT] = { "Verdict" , {"verdict" }, true /* asc: failure, success */, true /* left */ }, |
673 | [DURATION] = { "Duration (us)" , {"duration" , "dur" }, }, |
674 | [TOTAL_INSNS] = { "Insns" , {"total_insns" , "insns" }, }, |
675 | [TOTAL_STATES] = { "States" , {"total_states" , "states" }, }, |
676 | [PEAK_STATES] = { "Peak states" , {"peak_states" }, }, |
677 | [MAX_STATES_PER_INSN] = { "Max states per insn" , {"max_states_per_insn" }, }, |
678 | [MARK_READ_MAX_LEN] = { "Max mark read length" , {"max_mark_read_len" , "mark_read" }, }, |
679 | }; |
680 | |
681 | static bool parse_stat_id_var(const char *name, size_t len, int *id, |
682 | enum stat_variant *var, bool *is_abs) |
683 | { |
684 | static const char *var_sfxs[] = { |
685 | [VARIANT_A] = "_a" , |
686 | [VARIANT_B] = "_b" , |
687 | [VARIANT_DIFF] = "_diff" , |
688 | [VARIANT_PCT] = "_pct" , |
689 | }; |
690 | int i, j, k; |
691 | |
692 | /* |<stat>| means we take absolute value of given stat */ |
693 | *is_abs = false; |
694 | if (len > 2 && name[0] == '|' && name[len - 1] == '|') { |
695 | *is_abs = true; |
696 | name += 1; |
697 | len -= 2; |
698 | } |
699 | |
700 | for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { |
701 | struct stat_def *def = &stat_defs[i]; |
702 | size_t alias_len, sfx_len; |
703 | const char *alias; |
704 | |
705 | for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) { |
706 | alias = def->names[j]; |
707 | if (!alias) |
708 | continue; |
709 | |
710 | alias_len = strlen(alias); |
711 | if (strncmp(name, alias, alias_len) != 0) |
712 | continue; |
713 | |
714 | if (alias_len == len) { |
715 | /* If no variant suffix is specified, we |
716 | * assume control group (just in case we are |
717 | * in comparison mode. Variant is ignored in |
718 | * non-comparison mode. |
719 | */ |
720 | *var = VARIANT_B; |
721 | *id = i; |
722 | return true; |
723 | } |
724 | |
725 | for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) { |
726 | sfx_len = strlen(var_sfxs[k]); |
727 | if (alias_len + sfx_len != len) |
728 | continue; |
729 | |
730 | if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) { |
731 | *var = (enum stat_variant)k; |
732 | *id = i; |
733 | return true; |
734 | } |
735 | } |
736 | } |
737 | } |
738 | |
739 | return false; |
740 | } |
741 | |
742 | static bool is_asc_sym(char c) |
743 | { |
744 | return c == '^'; |
745 | } |
746 | |
747 | static bool is_desc_sym(char c) |
748 | { |
749 | return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_'; |
750 | } |
751 | |
752 | static int parse_stat(const char *stat_name, struct stat_specs *specs) |
753 | { |
754 | int id; |
755 | bool has_order = false, is_asc = false, is_abs = false; |
756 | size_t len = strlen(stat_name); |
757 | enum stat_variant var; |
758 | |
759 | if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) { |
760 | fprintf(stderr, "Can't specify more than %zd stats\n" , ARRAY_SIZE(specs->ids)); |
761 | return -E2BIG; |
762 | } |
763 | |
764 | if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) { |
765 | has_order = true; |
766 | is_asc = is_asc_sym(stat_name[len - 1]); |
767 | len -= 1; |
768 | } |
769 | |
770 | if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) { |
771 | fprintf(stderr, "Unrecognized stat name '%s'\n" , stat_name); |
772 | return -ESRCH; |
773 | } |
774 | |
775 | specs->ids[specs->spec_cnt] = id; |
776 | specs->variants[specs->spec_cnt] = var; |
777 | specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; |
778 | specs->abs[specs->spec_cnt] = is_abs; |
779 | specs->spec_cnt++; |
780 | |
781 | return 0; |
782 | } |
783 | |
784 | static int parse_stats(const char *stats_str, struct stat_specs *specs) |
785 | { |
786 | char *input, *state = NULL, *next; |
787 | int err; |
788 | |
789 | input = strdup(stats_str); |
790 | if (!input) |
791 | return -ENOMEM; |
792 | |
793 | while ((next = strtok_r(state ? NULL : input, "," , &state))) { |
794 | err = parse_stat(stat_name: next, specs); |
795 | if (err) |
796 | return err; |
797 | } |
798 | |
799 | return 0; |
800 | } |
801 | |
802 | static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt) |
803 | { |
804 | int i; |
805 | |
806 | if (!stats) |
807 | return; |
808 | |
809 | for (i = 0; i < stat_cnt; i++) { |
810 | free(stats[i].file_name); |
811 | free(stats[i].prog_name); |
812 | } |
813 | free(stats); |
814 | } |
815 | |
816 | static char verif_log_buf[64 * 1024]; |
817 | |
818 | #define MAX_PARSED_LOG_LINES 100 |
819 | |
820 | static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s) |
821 | { |
822 | const char *cur; |
823 | int pos, lines; |
824 | |
825 | buf[buf_sz - 1] = '\0'; |
826 | |
827 | for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) { |
828 | /* find previous endline or otherwise take the start of log buf */ |
829 | for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) { |
830 | } |
831 | /* next time start from end of previous line (or pos goes to <0) */ |
832 | pos--; |
833 | /* if we found endline, point right after endline symbol; |
834 | * otherwise, stay at the beginning of log buf |
835 | */ |
836 | if (cur[0] == '\n') |
837 | cur++; |
838 | |
839 | if (1 == sscanf(cur, "verification time %ld usec\n" , &s->stats[DURATION])) |
840 | continue; |
841 | if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld" , |
842 | &s->stats[TOTAL_INSNS], |
843 | &s->stats[MAX_STATES_PER_INSN], |
844 | &s->stats[TOTAL_STATES], |
845 | &s->stats[PEAK_STATES], |
846 | &s->stats[MARK_READ_MAX_LEN])) |
847 | continue; |
848 | } |
849 | |
850 | return 0; |
851 | } |
852 | |
853 | static int guess_prog_type_by_ctx_name(const char *ctx_name, |
854 | enum bpf_prog_type *prog_type, |
855 | enum bpf_attach_type *attach_type) |
856 | { |
857 | /* We need to guess program type based on its declared context type. |
858 | * This guess can't be perfect as many different program types might |
859 | * share the same context type. So we can only hope to reasonably |
860 | * well guess this and get lucky. |
861 | * |
862 | * Just in case, we support both UAPI-side type names and |
863 | * kernel-internal names. |
864 | */ |
865 | static struct { |
866 | const char *uapi_name; |
867 | const char *kern_name; |
868 | enum bpf_prog_type prog_type; |
869 | enum bpf_attach_type attach_type; |
870 | } ctx_map[] = { |
871 | /* __sk_buff is most ambiguous, we assume TC program */ |
872 | { "__sk_buff" , "sk_buff" , BPF_PROG_TYPE_SCHED_CLS }, |
873 | { "bpf_sock" , "sock" , BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND }, |
874 | { "bpf_sock_addr" , "bpf_sock_addr_kern" , BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND }, |
875 | { "bpf_sock_ops" , "bpf_sock_ops_kern" , BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS }, |
876 | { "sk_msg_md" , "sk_msg" , BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT }, |
877 | { "bpf_cgroup_dev_ctx" , "bpf_cgroup_dev_ctx" , BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE }, |
878 | { "bpf_sysctl" , "bpf_sysctl_kern" , BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL }, |
879 | { "bpf_sockopt" , "bpf_sockopt_kern" , BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT }, |
880 | { "sk_reuseport_md" , "sk_reuseport_kern" , BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE }, |
881 | { "bpf_sk_lookup" , "bpf_sk_lookup_kern" , BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP }, |
882 | { "xdp_md" , "xdp_buff" , BPF_PROG_TYPE_XDP, BPF_XDP }, |
883 | /* tracing types with no expected attach type */ |
884 | { "bpf_user_pt_regs_t" , "pt_regs" , BPF_PROG_TYPE_KPROBE }, |
885 | { "bpf_perf_event_data" , "bpf_perf_event_data_kern" , BPF_PROG_TYPE_PERF_EVENT }, |
886 | /* raw_tp programs use u64[] from kernel side, we don't want |
887 | * to match on that, probably; so NULL for kern-side type |
888 | */ |
889 | { "bpf_raw_tracepoint_args" , NULL, BPF_PROG_TYPE_RAW_TRACEPOINT }, |
890 | }; |
891 | int i; |
892 | |
893 | if (!ctx_name) |
894 | return -EINVAL; |
895 | |
896 | for (i = 0; i < ARRAY_SIZE(ctx_map); i++) { |
897 | if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 || |
898 | (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) { |
899 | *prog_type = ctx_map[i].prog_type; |
900 | *attach_type = ctx_map[i].attach_type; |
901 | return 0; |
902 | } |
903 | } |
904 | |
905 | return -ESRCH; |
906 | } |
907 | |
908 | static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename) |
909 | { |
910 | struct bpf_map *map; |
911 | |
912 | bpf_object__for_each_map(map, obj) { |
913 | /* disable pinning */ |
914 | bpf_map__set_pin_path(map, NULL); |
915 | |
916 | /* fix up map size, if necessary */ |
917 | switch (bpf_map__type(map)) { |
918 | case BPF_MAP_TYPE_SK_STORAGE: |
919 | case BPF_MAP_TYPE_TASK_STORAGE: |
920 | case BPF_MAP_TYPE_INODE_STORAGE: |
921 | case BPF_MAP_TYPE_CGROUP_STORAGE: |
922 | break; |
923 | default: |
924 | if (bpf_map__max_entries(map) == 0) |
925 | bpf_map__set_max_entries(map, 1); |
926 | } |
927 | } |
928 | |
929 | /* SEC(freplace) programs can't be loaded with veristat as is, |
930 | * but we can try guessing their target program's expected type by |
931 | * looking at the type of program's first argument and substituting |
932 | * corresponding program type |
933 | */ |
934 | if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) { |
935 | const struct btf *btf = bpf_object__btf(obj); |
936 | const char *prog_name = bpf_program__name(prog); |
937 | enum bpf_prog_type prog_type; |
938 | enum bpf_attach_type attach_type; |
939 | const struct btf_type *t; |
940 | const char *ctx_name; |
941 | int id; |
942 | |
943 | if (!btf) |
944 | goto skip_freplace_fixup; |
945 | |
946 | id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC); |
947 | t = btf__type_by_id(btf, id); |
948 | t = btf__type_by_id(btf, t->type); |
949 | if (!btf_is_func_proto(t) || btf_vlen(t) != 1) |
950 | goto skip_freplace_fixup; |
951 | |
952 | /* context argument is a pointer to a struct/typedef */ |
953 | t = btf__type_by_id(btf, btf_params(t)[0].type); |
954 | while (t && btf_is_mod(t)) |
955 | t = btf__type_by_id(btf, t->type); |
956 | if (!t || !btf_is_ptr(t)) |
957 | goto skip_freplace_fixup; |
958 | t = btf__type_by_id(btf, t->type); |
959 | while (t && btf_is_mod(t)) |
960 | t = btf__type_by_id(btf, t->type); |
961 | if (!t) |
962 | goto skip_freplace_fixup; |
963 | |
964 | ctx_name = btf__name_by_offset(btf, t->name_off); |
965 | |
966 | if (guess_prog_type_by_ctx_name(ctx_name, prog_type: &prog_type, attach_type: &attach_type) == 0) { |
967 | bpf_program__set_type(prog, prog_type); |
968 | bpf_program__set_expected_attach_type(prog, attach_type); |
969 | |
970 | if (!env.quiet) { |
971 | printf("Using guessed program type '%s' for %s/%s...\n" , |
972 | libbpf_bpf_prog_type_str(prog_type), |
973 | filename, prog_name); |
974 | } |
975 | } else { |
976 | if (!env.quiet) { |
977 | printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n" , |
978 | ctx_name, filename, prog_name); |
979 | } |
980 | } |
981 | } |
982 | skip_freplace_fixup: |
983 | return; |
984 | } |
985 | |
986 | static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) |
987 | { |
988 | const char *prog_name = bpf_program__name(prog); |
989 | const char *base_filename = basename(filename); |
990 | char *buf; |
991 | int buf_sz, log_level; |
992 | struct verif_stats *stats; |
993 | int err = 0; |
994 | void *tmp; |
995 | |
996 | if (!should_process_file_prog(base_filename, bpf_program__name(prog))) { |
997 | env.progs_skipped++; |
998 | return 0; |
999 | } |
1000 | |
1001 | tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats)); |
1002 | if (!tmp) |
1003 | return -ENOMEM; |
1004 | env.prog_stats = tmp; |
1005 | stats = &env.prog_stats[env.prog_stat_cnt++]; |
1006 | memset(stats, 0, sizeof(*stats)); |
1007 | |
1008 | if (env.verbose) { |
1009 | buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024; |
1010 | buf = malloc(buf_sz); |
1011 | if (!buf) |
1012 | return -ENOMEM; |
1013 | /* ensure we always request stats */ |
1014 | log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0); |
1015 | } else { |
1016 | buf = verif_log_buf; |
1017 | buf_sz = sizeof(verif_log_buf); |
1018 | /* request only verifier stats */ |
1019 | log_level = 4 | (env.log_fixed ? 8 : 0); |
1020 | } |
1021 | verif_log_buf[0] = '\0'; |
1022 | |
1023 | bpf_program__set_log_buf(prog, buf, buf_sz); |
1024 | bpf_program__set_log_level(prog, log_level); |
1025 | |
1026 | /* increase chances of successful BPF object loading */ |
1027 | fixup_obj(obj, prog, filename: base_filename); |
1028 | |
1029 | if (env.force_checkpoints) |
1030 | bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); |
1031 | if (env.force_reg_invariants) |
1032 | bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); |
1033 | |
1034 | err = bpf_object__load(obj); |
1035 | env.progs_processed++; |
1036 | |
1037 | stats->file_name = strdup(base_filename); |
1038 | stats->prog_name = strdup(bpf_program__name(prog)); |
1039 | stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */ |
1040 | parse_verif_log(buf, buf_sz, stats); |
1041 | |
1042 | if (env.verbose) { |
1043 | printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n" , |
1044 | filename, prog_name, stats->stats[DURATION], |
1045 | err ? "failure" : "success" , buf); |
1046 | } |
1047 | |
1048 | if (verif_log_buf != buf) |
1049 | free(buf); |
1050 | |
1051 | return 0; |
1052 | }; |
1053 | |
1054 | static int process_obj(const char *filename) |
1055 | { |
1056 | struct bpf_object *obj = NULL, *tobj; |
1057 | struct bpf_program *prog, *tprog, *lprog; |
1058 | libbpf_print_fn_t old_libbpf_print_fn; |
1059 | LIBBPF_OPTS(bpf_object_open_opts, opts); |
1060 | int err = 0, prog_cnt = 0; |
1061 | |
1062 | if (!should_process_file_prog(basename(filename), NULL)) { |
1063 | if (env.verbose) |
1064 | printf("Skipping '%s' due to filters...\n" , filename); |
1065 | env.files_skipped++; |
1066 | return 0; |
1067 | } |
1068 | if (!is_bpf_obj_file(filename)) { |
1069 | if (env.verbose) |
1070 | printf("Skipping '%s' as it's not a BPF object file...\n" , filename); |
1071 | env.files_skipped++; |
1072 | return 0; |
1073 | } |
1074 | |
1075 | if (!env.quiet && env.out_fmt == RESFMT_TABLE) |
1076 | printf("Processing '%s'...\n" , basename(filename)); |
1077 | |
1078 | old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn); |
1079 | obj = bpf_object__open_file(filename, &opts); |
1080 | if (!obj) { |
1081 | /* if libbpf can't open BPF object file, it could be because |
1082 | * that BPF object file is incomplete and has to be statically |
1083 | * linked into a final BPF object file; instead of bailing |
1084 | * out, report it into stderr, mark it as skipped, and |
1085 | * proceed |
1086 | */ |
1087 | fprintf(stderr, "Failed to open '%s': %d\n" , filename, -errno); |
1088 | env.files_skipped++; |
1089 | err = 0; |
1090 | goto cleanup; |
1091 | } |
1092 | |
1093 | env.files_processed++; |
1094 | |
1095 | bpf_object__for_each_program(prog, obj) { |
1096 | prog_cnt++; |
1097 | } |
1098 | |
1099 | if (prog_cnt == 1) { |
1100 | prog = bpf_object__next_program(obj, NULL); |
1101 | bpf_program__set_autoload(prog, true); |
1102 | process_prog(filename, obj, prog); |
1103 | goto cleanup; |
1104 | } |
1105 | |
1106 | bpf_object__for_each_program(prog, obj) { |
1107 | const char *prog_name = bpf_program__name(prog); |
1108 | |
1109 | tobj = bpf_object__open_file(filename, &opts); |
1110 | if (!tobj) { |
1111 | err = -errno; |
1112 | fprintf(stderr, "Failed to open '%s': %d\n" , filename, err); |
1113 | goto cleanup; |
1114 | } |
1115 | |
1116 | lprog = NULL; |
1117 | bpf_object__for_each_program(tprog, tobj) { |
1118 | const char *tprog_name = bpf_program__name(tprog); |
1119 | |
1120 | if (strcmp(prog_name, tprog_name) == 0) { |
1121 | bpf_program__set_autoload(tprog, true); |
1122 | lprog = tprog; |
1123 | } else { |
1124 | bpf_program__set_autoload(tprog, false); |
1125 | } |
1126 | } |
1127 | |
1128 | process_prog(filename, obj: tobj, prog: lprog); |
1129 | bpf_object__close(tobj); |
1130 | } |
1131 | |
1132 | cleanup: |
1133 | bpf_object__close(obj); |
1134 | libbpf_set_print(old_libbpf_print_fn); |
1135 | return err; |
1136 | } |
1137 | |
1138 | static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, |
1139 | enum stat_id id, bool asc, bool abs) |
1140 | { |
1141 | int cmp = 0; |
1142 | |
1143 | switch (id) { |
1144 | case FILE_NAME: |
1145 | cmp = strcmp(s1->file_name, s2->file_name); |
1146 | break; |
1147 | case PROG_NAME: |
1148 | cmp = strcmp(s1->prog_name, s2->prog_name); |
1149 | break; |
1150 | case VERDICT: |
1151 | case DURATION: |
1152 | case TOTAL_INSNS: |
1153 | case TOTAL_STATES: |
1154 | case PEAK_STATES: |
1155 | case MAX_STATES_PER_INSN: |
1156 | case MARK_READ_MAX_LEN: { |
1157 | long v1 = s1->stats[id]; |
1158 | long v2 = s2->stats[id]; |
1159 | |
1160 | if (abs) { |
1161 | v1 = v1 < 0 ? -v1 : v1; |
1162 | v2 = v2 < 0 ? -v2 : v2; |
1163 | } |
1164 | |
1165 | if (v1 != v2) |
1166 | cmp = v1 < v2 ? -1 : 1; |
1167 | break; |
1168 | } |
1169 | default: |
1170 | fprintf(stderr, "Unrecognized stat #%d\n" , id); |
1171 | exit(1); |
1172 | } |
1173 | |
1174 | return asc ? cmp : -cmp; |
1175 | } |
1176 | |
1177 | static int cmp_prog_stats(const void *v1, const void *v2) |
1178 | { |
1179 | const struct verif_stats *s1 = v1, *s2 = v2; |
1180 | int i, cmp; |
1181 | |
1182 | for (i = 0; i < env.sort_spec.spec_cnt; i++) { |
1183 | cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], |
1184 | env.sort_spec.asc[i], env.sort_spec.abs[i]); |
1185 | if (cmp != 0) |
1186 | return cmp; |
1187 | } |
1188 | |
1189 | /* always disambiguate with file+prog, which are unique */ |
1190 | cmp = strcmp(s1->file_name, s2->file_name); |
1191 | if (cmp != 0) |
1192 | return cmp; |
1193 | return strcmp(s1->prog_name, s2->prog_name); |
1194 | } |
1195 | |
1196 | static void fetch_join_stat_value(const struct verif_stats_join *s, |
1197 | enum stat_id id, enum stat_variant var, |
1198 | const char **str_val, |
1199 | double *num_val) |
1200 | { |
1201 | long v1, v2; |
1202 | |
1203 | if (id == FILE_NAME) { |
1204 | *str_val = s->file_name; |
1205 | return; |
1206 | } |
1207 | if (id == PROG_NAME) { |
1208 | *str_val = s->prog_name; |
1209 | return; |
1210 | } |
1211 | |
1212 | v1 = s->stats_a ? s->stats_a->stats[id] : 0; |
1213 | v2 = s->stats_b ? s->stats_b->stats[id] : 0; |
1214 | |
1215 | switch (var) { |
1216 | case VARIANT_A: |
1217 | if (!s->stats_a) |
1218 | *num_val = -DBL_MAX; |
1219 | else |
1220 | *num_val = s->stats_a->stats[id]; |
1221 | return; |
1222 | case VARIANT_B: |
1223 | if (!s->stats_b) |
1224 | *num_val = -DBL_MAX; |
1225 | else |
1226 | *num_val = s->stats_b->stats[id]; |
1227 | return; |
1228 | case VARIANT_DIFF: |
1229 | if (!s->stats_a || !s->stats_b) |
1230 | *num_val = -DBL_MAX; |
1231 | else if (id == VERDICT) |
1232 | *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */; |
1233 | else |
1234 | *num_val = (double)(v2 - v1); |
1235 | return; |
1236 | case VARIANT_PCT: |
1237 | if (!s->stats_a || !s->stats_b) { |
1238 | *num_val = -DBL_MAX; |
1239 | } else if (v1 == 0) { |
1240 | if (v1 == v2) |
1241 | *num_val = 0.0; |
1242 | else |
1243 | *num_val = v2 < v1 ? -100.0 : 100.0; |
1244 | } else { |
1245 | *num_val = (v2 - v1) * 100.0 / v1; |
1246 | } |
1247 | return; |
1248 | } |
1249 | } |
1250 | |
1251 | static int cmp_join_stat(const struct verif_stats_join *s1, |
1252 | const struct verif_stats_join *s2, |
1253 | enum stat_id id, enum stat_variant var, |
1254 | bool asc, bool abs) |
1255 | { |
1256 | const char *str1 = NULL, *str2 = NULL; |
1257 | double v1 = 0.0, v2 = 0.0; |
1258 | int cmp = 0; |
1259 | |
1260 | fetch_join_stat_value(s: s1, id, var, str_val: &str1, num_val: &v1); |
1261 | fetch_join_stat_value(s: s2, id, var, str_val: &str2, num_val: &v2); |
1262 | |
1263 | if (abs) { |
1264 | v1 = fabs(v1); |
1265 | v2 = fabs(v2); |
1266 | } |
1267 | |
1268 | if (str1) |
1269 | cmp = strcmp(str1, str2); |
1270 | else if (v1 != v2) |
1271 | cmp = v1 < v2 ? -1 : 1; |
1272 | |
1273 | return asc ? cmp : -cmp; |
1274 | } |
1275 | |
1276 | static int cmp_join_stats(const void *v1, const void *v2) |
1277 | { |
1278 | const struct verif_stats_join *s1 = v1, *s2 = v2; |
1279 | int i, cmp; |
1280 | |
1281 | for (i = 0; i < env.sort_spec.spec_cnt; i++) { |
1282 | cmp = cmp_join_stat(s1, s2, |
1283 | env.sort_spec.ids[i], |
1284 | env.sort_spec.variants[i], |
1285 | env.sort_spec.asc[i], |
1286 | env.sort_spec.abs[i]); |
1287 | if (cmp != 0) |
1288 | return cmp; |
1289 | } |
1290 | |
1291 | /* always disambiguate with file+prog, which are unique */ |
1292 | cmp = strcmp(s1->file_name, s2->file_name); |
1293 | if (cmp != 0) |
1294 | return cmp; |
1295 | return strcmp(s1->prog_name, s2->prog_name); |
1296 | } |
1297 | |
1298 | #define '-' |
1299 | #define COLUMN_SEP " " |
1300 | |
1301 | static void (void) |
1302 | { |
1303 | int i, j, len; |
1304 | |
1305 | for (i = 0; i < env.output_spec.spec_cnt; i++) { |
1306 | len = env.output_spec.lens[i]; |
1307 | |
1308 | printf("%s" , i == 0 ? "" : COLUMN_SEP); |
1309 | for (j = 0; j < len; j++) |
1310 | printf("%c" , HEADER_CHAR); |
1311 | } |
1312 | printf("\n" ); |
1313 | } |
1314 | |
1315 | static void (enum resfmt fmt) |
1316 | { |
1317 | const char *fmt_str; |
1318 | int i, len; |
1319 | |
1320 | for (i = 0; i < env.output_spec.spec_cnt; i++) { |
1321 | int id = env.output_spec.ids[i]; |
1322 | int *max_len = &env.output_spec.lens[i]; |
1323 | |
1324 | switch (fmt) { |
1325 | case RESFMT_TABLE_CALCLEN: |
1326 | len = snprintf(NULL, 0, "%s" , stat_defs[id].header); |
1327 | if (len > *max_len) |
1328 | *max_len = len; |
1329 | break; |
1330 | case RESFMT_TABLE: |
1331 | fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s" ; |
1332 | printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); |
1333 | if (i == env.output_spec.spec_cnt - 1) |
1334 | printf("\n" ); |
1335 | break; |
1336 | case RESFMT_CSV: |
1337 | printf("%s%s" , i == 0 ? "" : "," , stat_defs[id].names[0]); |
1338 | if (i == env.output_spec.spec_cnt - 1) |
1339 | printf("\n" ); |
1340 | break; |
1341 | } |
1342 | } |
1343 | |
1344 | if (fmt == RESFMT_TABLE) |
1345 | output_header_underlines(); |
1346 | } |
1347 | |
1348 | static void prepare_value(const struct verif_stats *s, enum stat_id id, |
1349 | const char **str, long *val) |
1350 | { |
1351 | switch (id) { |
1352 | case FILE_NAME: |
1353 | *str = s ? s->file_name : "N/A" ; |
1354 | break; |
1355 | case PROG_NAME: |
1356 | *str = s ? s->prog_name : "N/A" ; |
1357 | break; |
1358 | case VERDICT: |
1359 | if (!s) |
1360 | *str = "N/A" ; |
1361 | else |
1362 | *str = s->stats[VERDICT] ? "success" : "failure" ; |
1363 | break; |
1364 | case DURATION: |
1365 | case TOTAL_INSNS: |
1366 | case TOTAL_STATES: |
1367 | case PEAK_STATES: |
1368 | case MAX_STATES_PER_INSN: |
1369 | case MARK_READ_MAX_LEN: |
1370 | *val = s ? s->stats[id] : 0; |
1371 | break; |
1372 | default: |
1373 | fprintf(stderr, "Unrecognized stat #%d\n" , id); |
1374 | exit(1); |
1375 | } |
1376 | } |
1377 | |
1378 | static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last) |
1379 | { |
1380 | int i; |
1381 | |
1382 | for (i = 0; i < env.output_spec.spec_cnt; i++) { |
1383 | int id = env.output_spec.ids[i]; |
1384 | int *max_len = &env.output_spec.lens[i], len; |
1385 | const char *str = NULL; |
1386 | long val = 0; |
1387 | |
1388 | prepare_value(s, id, str: &str, val: &val); |
1389 | |
1390 | switch (fmt) { |
1391 | case RESFMT_TABLE_CALCLEN: |
1392 | if (str) |
1393 | len = snprintf(NULL, 0, "%s" , str); |
1394 | else |
1395 | len = snprintf(NULL, 0, "%ld" , val); |
1396 | if (len > *max_len) |
1397 | *max_len = len; |
1398 | break; |
1399 | case RESFMT_TABLE: |
1400 | if (str) |
1401 | printf("%s%-*s" , i == 0 ? "" : COLUMN_SEP, *max_len, str); |
1402 | else |
1403 | printf("%s%*ld" , i == 0 ? "" : COLUMN_SEP, *max_len, val); |
1404 | if (i == env.output_spec.spec_cnt - 1) |
1405 | printf("\n" ); |
1406 | break; |
1407 | case RESFMT_CSV: |
1408 | if (str) |
1409 | printf("%s%s" , i == 0 ? "" : "," , str); |
1410 | else |
1411 | printf("%s%ld" , i == 0 ? "" : "," , val); |
1412 | if (i == env.output_spec.spec_cnt - 1) |
1413 | printf("\n" ); |
1414 | break; |
1415 | } |
1416 | } |
1417 | |
1418 | if (last && fmt == RESFMT_TABLE) { |
1419 | output_header_underlines(); |
1420 | printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n" , |
1421 | env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped); |
1422 | } |
1423 | } |
1424 | |
1425 | static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st) |
1426 | { |
1427 | switch (id) { |
1428 | case FILE_NAME: |
1429 | st->file_name = strdup(str); |
1430 | if (!st->file_name) |
1431 | return -ENOMEM; |
1432 | break; |
1433 | case PROG_NAME: |
1434 | st->prog_name = strdup(str); |
1435 | if (!st->prog_name) |
1436 | return -ENOMEM; |
1437 | break; |
1438 | case VERDICT: |
1439 | if (strcmp(str, "success" ) == 0) { |
1440 | st->stats[VERDICT] = true; |
1441 | } else if (strcmp(str, "failure" ) == 0) { |
1442 | st->stats[VERDICT] = false; |
1443 | } else { |
1444 | fprintf(stderr, "Unrecognized verification verdict '%s'\n" , str); |
1445 | return -EINVAL; |
1446 | } |
1447 | break; |
1448 | case DURATION: |
1449 | case TOTAL_INSNS: |
1450 | case TOTAL_STATES: |
1451 | case PEAK_STATES: |
1452 | case MAX_STATES_PER_INSN: |
1453 | case MARK_READ_MAX_LEN: { |
1454 | long val; |
1455 | int err, n; |
1456 | |
1457 | if (sscanf(str, "%ld %n" , &val, &n) != 1 || n != strlen(str)) { |
1458 | err = -errno; |
1459 | fprintf(stderr, "Failed to parse '%s' as integer\n" , str); |
1460 | return err; |
1461 | } |
1462 | |
1463 | st->stats[id] = val; |
1464 | break; |
1465 | } |
1466 | default: |
1467 | fprintf(stderr, "Unrecognized stat #%d\n" , id); |
1468 | return -EINVAL; |
1469 | } |
1470 | return 0; |
1471 | } |
1472 | |
1473 | static int parse_stats_csv(const char *filename, struct stat_specs *specs, |
1474 | struct verif_stats **statsp, int *stat_cntp) |
1475 | { |
1476 | char line[4096]; |
1477 | FILE *f; |
1478 | int err = 0; |
1479 | bool header = true; |
1480 | |
1481 | f = fopen(filename, "r" ); |
1482 | if (!f) { |
1483 | err = -errno; |
1484 | fprintf(stderr, "Failed to open '%s': %d\n" , filename, err); |
1485 | return err; |
1486 | } |
1487 | |
1488 | *stat_cntp = 0; |
1489 | |
1490 | while (fgets(line, sizeof(line), f)) { |
1491 | char *input = line, *state = NULL, *next; |
1492 | struct verif_stats *st = NULL; |
1493 | int col = 0; |
1494 | |
1495 | if (!header) { |
1496 | void *tmp; |
1497 | |
1498 | tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp)); |
1499 | if (!tmp) { |
1500 | err = -ENOMEM; |
1501 | goto cleanup; |
1502 | } |
1503 | *statsp = tmp; |
1504 | |
1505 | st = &(*statsp)[*stat_cntp]; |
1506 | memset(st, 0, sizeof(*st)); |
1507 | |
1508 | *stat_cntp += 1; |
1509 | } |
1510 | |
1511 | while ((next = strtok_r(state ? NULL : input, ",\n" , &state))) { |
1512 | if (header) { |
1513 | /* for the first line, set up spec stats */ |
1514 | err = parse_stat(stat_name: next, specs); |
1515 | if (err) |
1516 | goto cleanup; |
1517 | continue; |
1518 | } |
1519 | |
1520 | /* for all other lines, parse values based on spec */ |
1521 | if (col >= specs->spec_cnt) { |
1522 | fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n" , |
1523 | col, *stat_cntp, filename); |
1524 | err = -EINVAL; |
1525 | goto cleanup; |
1526 | } |
1527 | err = parse_stat_value(str: next, id: specs->ids[col], st); |
1528 | if (err) |
1529 | goto cleanup; |
1530 | col++; |
1531 | } |
1532 | |
1533 | if (header) { |
1534 | header = false; |
1535 | continue; |
1536 | } |
1537 | |
1538 | if (col < specs->spec_cnt) { |
1539 | fprintf(stderr, "Not enough columns in row #%d in '%s'\n" , |
1540 | *stat_cntp, filename); |
1541 | err = -EINVAL; |
1542 | goto cleanup; |
1543 | } |
1544 | |
1545 | if (!st->file_name || !st->prog_name) { |
1546 | fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n" , |
1547 | *stat_cntp, filename); |
1548 | err = -EINVAL; |
1549 | goto cleanup; |
1550 | } |
1551 | |
1552 | /* in comparison mode we can only check filters after we |
1553 | * parsed entire line; if row should be ignored we pretend we |
1554 | * never parsed it |
1555 | */ |
1556 | if (!should_process_file_prog(st->file_name, st->prog_name)) { |
1557 | free(st->file_name); |
1558 | free(st->prog_name); |
1559 | *stat_cntp -= 1; |
1560 | } |
1561 | } |
1562 | |
1563 | if (!feof(f)) { |
1564 | err = -errno; |
1565 | fprintf(stderr, "Failed I/O for '%s': %d\n" , filename, err); |
1566 | } |
1567 | |
1568 | cleanup: |
1569 | fclose(f); |
1570 | return err; |
1571 | } |
1572 | |
1573 | /* empty/zero stats for mismatched rows */ |
1574 | static const struct verif_stats fallback_stats = { .file_name = "" , .prog_name = "" }; |
1575 | |
1576 | static bool is_key_stat(enum stat_id id) |
1577 | { |
1578 | return id == FILE_NAME || id == PROG_NAME; |
1579 | } |
1580 | |
1581 | static void (void) |
1582 | { |
1583 | int i, j, k; |
1584 | |
1585 | for (i = 0; i < env.output_spec.spec_cnt; i++) { |
1586 | int id = env.output_spec.ids[i]; |
1587 | int max_j = is_key_stat(id) ? 1 : 3; |
1588 | |
1589 | for (j = 0; j < max_j; j++) { |
1590 | int len = env.output_spec.lens[3 * i + j]; |
1591 | |
1592 | printf("%s" , i + j == 0 ? "" : COLUMN_SEP); |
1593 | |
1594 | for (k = 0; k < len; k++) |
1595 | printf("%c" , HEADER_CHAR); |
1596 | } |
1597 | } |
1598 | printf("\n" ); |
1599 | } |
1600 | |
1601 | static void (enum resfmt fmt) |
1602 | { |
1603 | static const char *table_sfxs[3] = {" (A)" , " (B)" , " (DIFF)" }; |
1604 | static const char *name_sfxs[3] = {"_base" , "_comp" , "_diff" }; |
1605 | int i, j, len; |
1606 | |
1607 | for (i = 0; i < env.output_spec.spec_cnt; i++) { |
1608 | int id = env.output_spec.ids[i]; |
1609 | /* key stats don't have A/B/DIFF columns, they are common for both data sets */ |
1610 | int max_j = is_key_stat(id) ? 1 : 3; |
1611 | |
1612 | for (j = 0; j < max_j; j++) { |
1613 | int *max_len = &env.output_spec.lens[3 * i + j]; |
1614 | bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1); |
1615 | const char *sfx; |
1616 | |
1617 | switch (fmt) { |
1618 | case RESFMT_TABLE_CALCLEN: |
1619 | sfx = is_key_stat(id) ? "" : table_sfxs[j]; |
1620 | len = snprintf(NULL, 0, "%s%s" , stat_defs[id].header, sfx); |
1621 | if (len > *max_len) |
1622 | *max_len = len; |
1623 | break; |
1624 | case RESFMT_TABLE: |
1625 | sfx = is_key_stat(id) ? "" : table_sfxs[j]; |
1626 | printf("%s%-*s%s" , i + j == 0 ? "" : COLUMN_SEP, |
1627 | *max_len - (int)strlen(sfx), stat_defs[id].header, sfx); |
1628 | if (last) |
1629 | printf("\n" ); |
1630 | break; |
1631 | case RESFMT_CSV: |
1632 | sfx = is_key_stat(id) ? "" : name_sfxs[j]; |
1633 | printf("%s%s%s" , i + j == 0 ? "" : "," , stat_defs[id].names[0], sfx); |
1634 | if (last) |
1635 | printf("\n" ); |
1636 | break; |
1637 | } |
1638 | } |
1639 | } |
1640 | |
1641 | if (fmt == RESFMT_TABLE) |
1642 | output_comp_header_underlines(); |
1643 | } |
1644 | |
1645 | static void output_comp_stats(const struct verif_stats_join *join_stats, |
1646 | enum resfmt fmt, bool last) |
1647 | { |
1648 | const struct verif_stats *base = join_stats->stats_a; |
1649 | const struct verif_stats *comp = join_stats->stats_b; |
1650 | char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {}; |
1651 | int i; |
1652 | |
1653 | for (i = 0; i < env.output_spec.spec_cnt; i++) { |
1654 | int id = env.output_spec.ids[i], len; |
1655 | int *max_len_base = &env.output_spec.lens[3 * i + 0]; |
1656 | int *max_len_comp = &env.output_spec.lens[3 * i + 1]; |
1657 | int *max_len_diff = &env.output_spec.lens[3 * i + 2]; |
1658 | const char *base_str = NULL, *comp_str = NULL; |
1659 | long base_val = 0, comp_val = 0, diff_val = 0; |
1660 | |
1661 | prepare_value(s: base, id, str: &base_str, val: &base_val); |
1662 | prepare_value(s: comp, id, str: &comp_str, val: &comp_val); |
1663 | |
1664 | /* normalize all the outputs to be in string buffers for simplicity */ |
1665 | if (is_key_stat(id)) { |
1666 | /* key stats (file and program name) are always strings */ |
1667 | if (base) |
1668 | snprintf(base_buf, sizeof(base_buf), "%s" , base_str); |
1669 | else |
1670 | snprintf(base_buf, sizeof(base_buf), "%s" , comp_str); |
1671 | } else if (base_str) { |
1672 | snprintf(base_buf, sizeof(base_buf), "%s" , base_str); |
1673 | snprintf(comp_buf, sizeof(comp_buf), "%s" , comp_str); |
1674 | if (!base || !comp) |
1675 | snprintf(diff_buf, sizeof(diff_buf), "%s" , "N/A" ); |
1676 | else if (strcmp(base_str, comp_str) == 0) |
1677 | snprintf(diff_buf, sizeof(diff_buf), "%s" , "MATCH" ); |
1678 | else |
1679 | snprintf(diff_buf, sizeof(diff_buf), "%s" , "MISMATCH" ); |
1680 | } else { |
1681 | double p = 0.0; |
1682 | |
1683 | if (base) |
1684 | snprintf(base_buf, sizeof(base_buf), "%ld" , base_val); |
1685 | else |
1686 | snprintf(base_buf, sizeof(base_buf), "%s" , "N/A" ); |
1687 | if (comp) |
1688 | snprintf(comp_buf, sizeof(comp_buf), "%ld" , comp_val); |
1689 | else |
1690 | snprintf(comp_buf, sizeof(comp_buf), "%s" , "N/A" ); |
1691 | |
1692 | diff_val = comp_val - base_val; |
1693 | if (!base || !comp) { |
1694 | snprintf(diff_buf, sizeof(diff_buf), "%s" , "N/A" ); |
1695 | } else { |
1696 | if (base_val == 0) { |
1697 | if (comp_val == base_val) |
1698 | p = 0.0; /* avoid +0 (+100%) case */ |
1699 | else |
1700 | p = comp_val < base_val ? -100.0 : 100.0; |
1701 | } else { |
1702 | p = diff_val * 100.0 / base_val; |
1703 | } |
1704 | snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)" , diff_val, p); |
1705 | } |
1706 | } |
1707 | |
1708 | switch (fmt) { |
1709 | case RESFMT_TABLE_CALCLEN: |
1710 | len = strlen(base_buf); |
1711 | if (len > *max_len_base) |
1712 | *max_len_base = len; |
1713 | if (!is_key_stat(id)) { |
1714 | len = strlen(comp_buf); |
1715 | if (len > *max_len_comp) |
1716 | *max_len_comp = len; |
1717 | len = strlen(diff_buf); |
1718 | if (len > *max_len_diff) |
1719 | *max_len_diff = len; |
1720 | } |
1721 | break; |
1722 | case RESFMT_TABLE: { |
1723 | /* string outputs are left-aligned, number outputs are right-aligned */ |
1724 | const char *fmt = base_str ? "%s%-*s" : "%s%*s" ; |
1725 | |
1726 | printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf); |
1727 | if (!is_key_stat(id)) { |
1728 | printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf); |
1729 | printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf); |
1730 | } |
1731 | if (i == env.output_spec.spec_cnt - 1) |
1732 | printf("\n" ); |
1733 | break; |
1734 | } |
1735 | case RESFMT_CSV: |
1736 | printf("%s%s" , i == 0 ? "" : "," , base_buf); |
1737 | if (!is_key_stat(id)) { |
1738 | printf("%s%s" , i == 0 ? "" : "," , comp_buf); |
1739 | printf("%s%s" , i == 0 ? "" : "," , diff_buf); |
1740 | } |
1741 | if (i == env.output_spec.spec_cnt - 1) |
1742 | printf("\n" ); |
1743 | break; |
1744 | } |
1745 | } |
1746 | |
1747 | if (last && fmt == RESFMT_TABLE) |
1748 | output_comp_header_underlines(); |
1749 | } |
1750 | |
1751 | static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp) |
1752 | { |
1753 | int r; |
1754 | |
1755 | r = strcmp(base->file_name, comp->file_name); |
1756 | if (r != 0) |
1757 | return r; |
1758 | return strcmp(base->prog_name, comp->prog_name); |
1759 | } |
1760 | |
1761 | static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats) |
1762 | { |
1763 | static const double eps = 1e-9; |
1764 | const char *str = NULL; |
1765 | double value = 0.0; |
1766 | |
1767 | fetch_join_stat_value(s: stats, id: f->stat_id, var: f->stat_var, str_val: &str, num_val: &value); |
1768 | |
1769 | if (f->abs) |
1770 | value = fabs(value); |
1771 | |
1772 | switch (f->op) { |
1773 | case OP_EQ: return value > f->value - eps && value < f->value + eps; |
1774 | case OP_NEQ: return value < f->value - eps || value > f->value + eps; |
1775 | case OP_LT: return value < f->value - eps; |
1776 | case OP_LE: return value <= f->value + eps; |
1777 | case OP_GT: return value > f->value + eps; |
1778 | case OP_GE: return value >= f->value - eps; |
1779 | } |
1780 | |
1781 | fprintf(stderr, "BUG: unknown filter op %d!\n" , f->op); |
1782 | return false; |
1783 | } |
1784 | |
1785 | static bool should_output_join_stats(const struct verif_stats_join *stats) |
1786 | { |
1787 | struct filter *f; |
1788 | int i, allow_cnt = 0; |
1789 | |
1790 | for (i = 0; i < env.deny_filter_cnt; i++) { |
1791 | f = &env.deny_filters[i]; |
1792 | if (f->kind != FILTER_STAT) |
1793 | continue; |
1794 | |
1795 | if (is_join_stat_filter_matched(f, stats)) |
1796 | return false; |
1797 | } |
1798 | |
1799 | for (i = 0; i < env.allow_filter_cnt; i++) { |
1800 | f = &env.allow_filters[i]; |
1801 | if (f->kind != FILTER_STAT) |
1802 | continue; |
1803 | allow_cnt++; |
1804 | |
1805 | if (is_join_stat_filter_matched(f, stats)) |
1806 | return true; |
1807 | } |
1808 | |
1809 | /* if there are no stat allowed filters, pass everything through */ |
1810 | return allow_cnt == 0; |
1811 | } |
1812 | |
1813 | static int handle_comparison_mode(void) |
1814 | { |
1815 | struct stat_specs base_specs = {}, comp_specs = {}; |
1816 | struct stat_specs tmp_sort_spec; |
1817 | enum resfmt cur_fmt; |
1818 | int err, i, j, last_idx, cnt; |
1819 | |
1820 | if (env.filename_cnt != 2) { |
1821 | fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n" ); |
1822 | argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat" ); |
1823 | return -EINVAL; |
1824 | } |
1825 | |
1826 | err = parse_stats_csv(filename: env.filenames[0], specs: &base_specs, |
1827 | statsp: &env.baseline_stats, stat_cntp: &env.baseline_stat_cnt); |
1828 | if (err) { |
1829 | fprintf(stderr, "Failed to parse stats from '%s': %d\n" , env.filenames[0], err); |
1830 | return err; |
1831 | } |
1832 | err = parse_stats_csv(filename: env.filenames[1], specs: &comp_specs, |
1833 | statsp: &env.prog_stats, stat_cntp: &env.prog_stat_cnt); |
1834 | if (err) { |
1835 | fprintf(stderr, "Failed to parse stats from '%s': %d\n" , env.filenames[1], err); |
1836 | return err; |
1837 | } |
1838 | |
1839 | /* To keep it simple we validate that the set and order of stats in |
1840 | * both CSVs are exactly the same. This can be lifted with a bit more |
1841 | * pre-processing later. |
1842 | */ |
1843 | if (base_specs.spec_cnt != comp_specs.spec_cnt) { |
1844 | fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n" , |
1845 | env.filenames[0], env.filenames[1], |
1846 | base_specs.spec_cnt, comp_specs.spec_cnt); |
1847 | return -EINVAL; |
1848 | } |
1849 | for (i = 0; i < base_specs.spec_cnt; i++) { |
1850 | if (base_specs.ids[i] != comp_specs.ids[i]) { |
1851 | fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n" , |
1852 | env.filenames[0], env.filenames[1], |
1853 | stat_defs[base_specs.ids[i]].names[0], |
1854 | stat_defs[comp_specs.ids[i]].names[0]); |
1855 | return -EINVAL; |
1856 | } |
1857 | } |
1858 | |
1859 | /* Replace user-specified sorting spec with file+prog sorting rule to |
1860 | * be able to join two datasets correctly. Once we are done, we will |
1861 | * restore the original sort spec. |
1862 | */ |
1863 | tmp_sort_spec = env.sort_spec; |
1864 | env.sort_spec = join_sort_spec; |
1865 | qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); |
1866 | qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats); |
1867 | env.sort_spec = tmp_sort_spec; |
1868 | |
1869 | /* Join two datasets together. If baseline and comparison datasets |
1870 | * have different subset of rows (we match by 'object + prog' as |
1871 | * a unique key) then assume empty/missing/zero value for rows that |
1872 | * are missing in the opposite data set. |
1873 | */ |
1874 | i = j = 0; |
1875 | while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) { |
1876 | const struct verif_stats *base, *comp; |
1877 | struct verif_stats_join *join; |
1878 | void *tmp; |
1879 | int r; |
1880 | |
1881 | base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats; |
1882 | comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats; |
1883 | |
1884 | if (!base->file_name || !base->prog_name) { |
1885 | fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n" , |
1886 | i, env.filenames[0]); |
1887 | return -EINVAL; |
1888 | } |
1889 | if (!comp->file_name || !comp->prog_name) { |
1890 | fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n" , |
1891 | j, env.filenames[1]); |
1892 | return -EINVAL; |
1893 | } |
1894 | |
1895 | tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats)); |
1896 | if (!tmp) |
1897 | return -ENOMEM; |
1898 | env.join_stats = tmp; |
1899 | |
1900 | join = &env.join_stats[env.join_stat_cnt]; |
1901 | memset(join, 0, sizeof(*join)); |
1902 | |
1903 | r = cmp_stats_key(base, comp); |
1904 | if (r == 0) { |
1905 | join->file_name = base->file_name; |
1906 | join->prog_name = base->prog_name; |
1907 | join->stats_a = base; |
1908 | join->stats_b = comp; |
1909 | i++; |
1910 | j++; |
1911 | } else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) { |
1912 | join->file_name = base->file_name; |
1913 | join->prog_name = base->prog_name; |
1914 | join->stats_a = base; |
1915 | join->stats_b = NULL; |
1916 | i++; |
1917 | } else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) { |
1918 | join->file_name = comp->file_name; |
1919 | join->prog_name = comp->prog_name; |
1920 | join->stats_a = NULL; |
1921 | join->stats_b = comp; |
1922 | j++; |
1923 | } else { |
1924 | fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i" , |
1925 | __FILE__, __LINE__, i, j); |
1926 | return -EINVAL; |
1927 | } |
1928 | env.join_stat_cnt += 1; |
1929 | } |
1930 | |
1931 | /* now sort joined results according to sort spec */ |
1932 | qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); |
1933 | |
1934 | /* for human-readable table output we need to do extra pass to |
1935 | * calculate column widths, so we substitute current output format |
1936 | * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE |
1937 | * and do everything again. |
1938 | */ |
1939 | if (env.out_fmt == RESFMT_TABLE) |
1940 | cur_fmt = RESFMT_TABLE_CALCLEN; |
1941 | else |
1942 | cur_fmt = env.out_fmt; |
1943 | |
1944 | one_more_time: |
1945 | output_comp_headers(fmt: cur_fmt); |
1946 | |
1947 | last_idx = -1; |
1948 | cnt = 0; |
1949 | for (i = 0; i < env.join_stat_cnt; i++) { |
1950 | const struct verif_stats_join *join = &env.join_stats[i]; |
1951 | |
1952 | if (!should_output_join_stats(join)) |
1953 | continue; |
1954 | |
1955 | if (env.top_n && cnt >= env.top_n) |
1956 | break; |
1957 | |
1958 | if (cur_fmt == RESFMT_TABLE_CALCLEN) |
1959 | last_idx = i; |
1960 | |
1961 | output_comp_stats(join, cur_fmt, i == last_idx); |
1962 | |
1963 | cnt++; |
1964 | } |
1965 | |
1966 | if (cur_fmt == RESFMT_TABLE_CALCLEN) { |
1967 | cur_fmt = RESFMT_TABLE; |
1968 | goto one_more_time; /* ... this time with feeling */ |
1969 | } |
1970 | |
1971 | return 0; |
1972 | } |
1973 | |
1974 | static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats) |
1975 | { |
1976 | long value = stats->stats[f->stat_id]; |
1977 | |
1978 | if (f->abs) |
1979 | value = value < 0 ? -value : value; |
1980 | |
1981 | switch (f->op) { |
1982 | case OP_EQ: return value == f->value; |
1983 | case OP_NEQ: return value != f->value; |
1984 | case OP_LT: return value < f->value; |
1985 | case OP_LE: return value <= f->value; |
1986 | case OP_GT: return value > f->value; |
1987 | case OP_GE: return value >= f->value; |
1988 | } |
1989 | |
1990 | fprintf(stderr, "BUG: unknown filter op %d!\n" , f->op); |
1991 | return false; |
1992 | } |
1993 | |
1994 | static bool should_output_stats(const struct verif_stats *stats) |
1995 | { |
1996 | struct filter *f; |
1997 | int i, allow_cnt = 0; |
1998 | |
1999 | for (i = 0; i < env.deny_filter_cnt; i++) { |
2000 | f = &env.deny_filters[i]; |
2001 | if (f->kind != FILTER_STAT) |
2002 | continue; |
2003 | |
2004 | if (is_stat_filter_matched(f, stats)) |
2005 | return false; |
2006 | } |
2007 | |
2008 | for (i = 0; i < env.allow_filter_cnt; i++) { |
2009 | f = &env.allow_filters[i]; |
2010 | if (f->kind != FILTER_STAT) |
2011 | continue; |
2012 | allow_cnt++; |
2013 | |
2014 | if (is_stat_filter_matched(f, stats)) |
2015 | return true; |
2016 | } |
2017 | |
2018 | /* if there are no stat allowed filters, pass everything through */ |
2019 | return allow_cnt == 0; |
2020 | } |
2021 | |
2022 | static void output_prog_stats(void) |
2023 | { |
2024 | const struct verif_stats *stats; |
2025 | int i, last_stat_idx = 0, cnt = 0; |
2026 | |
2027 | if (env.out_fmt == RESFMT_TABLE) { |
2028 | /* calculate column widths */ |
2029 | output_headers(fmt: RESFMT_TABLE_CALCLEN); |
2030 | for (i = 0; i < env.prog_stat_cnt; i++) { |
2031 | stats = &env.prog_stats[i]; |
2032 | if (!should_output_stats(stats)) |
2033 | continue; |
2034 | output_stats(stats, RESFMT_TABLE_CALCLEN, false); |
2035 | last_stat_idx = i; |
2036 | } |
2037 | } |
2038 | |
2039 | /* actually output the table */ |
2040 | output_headers(fmt: env.out_fmt); |
2041 | for (i = 0; i < env.prog_stat_cnt; i++) { |
2042 | stats = &env.prog_stats[i]; |
2043 | if (!should_output_stats(stats)) |
2044 | continue; |
2045 | if (env.top_n && cnt >= env.top_n) |
2046 | break; |
2047 | output_stats(stats, env.out_fmt, i == last_stat_idx); |
2048 | cnt++; |
2049 | } |
2050 | } |
2051 | |
2052 | static int handle_verif_mode(void) |
2053 | { |
2054 | int i, err; |
2055 | |
2056 | if (env.filename_cnt == 0) { |
2057 | fprintf(stderr, "Please provide path to BPF object file!\n\n" ); |
2058 | argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat" ); |
2059 | return -EINVAL; |
2060 | } |
2061 | |
2062 | for (i = 0; i < env.filename_cnt; i++) { |
2063 | err = process_obj(filename: env.filenames[i]); |
2064 | if (err) { |
2065 | fprintf(stderr, "Failed to process '%s': %d\n" , env.filenames[i], err); |
2066 | return err; |
2067 | } |
2068 | } |
2069 | |
2070 | qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); |
2071 | |
2072 | output_prog_stats(); |
2073 | |
2074 | return 0; |
2075 | } |
2076 | |
2077 | static int handle_replay_mode(void) |
2078 | { |
2079 | struct stat_specs specs = {}; |
2080 | int err; |
2081 | |
2082 | if (env.filename_cnt != 1) { |
2083 | fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n" ); |
2084 | argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat" ); |
2085 | return -EINVAL; |
2086 | } |
2087 | |
2088 | err = parse_stats_csv(filename: env.filenames[0], specs: &specs, |
2089 | statsp: &env.prog_stats, stat_cntp: &env.prog_stat_cnt); |
2090 | if (err) { |
2091 | fprintf(stderr, "Failed to parse stats from '%s': %d\n" , env.filenames[0], err); |
2092 | return err; |
2093 | } |
2094 | |
2095 | qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); |
2096 | |
2097 | output_prog_stats(); |
2098 | |
2099 | return 0; |
2100 | } |
2101 | |
2102 | int main(int argc, char **argv) |
2103 | { |
2104 | int err = 0, i; |
2105 | |
2106 | if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) |
2107 | return 1; |
2108 | |
2109 | if (env.show_version) { |
2110 | printf("%s\n" , argp_program_version); |
2111 | return 0; |
2112 | } |
2113 | |
2114 | if (env.verbose && env.quiet) { |
2115 | fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n" ); |
2116 | argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat" ); |
2117 | return 1; |
2118 | } |
2119 | if (env.verbose && env.log_level == 0) |
2120 | env.log_level = 1; |
2121 | |
2122 | if (env.output_spec.spec_cnt == 0) { |
2123 | if (env.out_fmt == RESFMT_CSV) |
2124 | env.output_spec = default_csv_output_spec; |
2125 | else |
2126 | env.output_spec = default_output_spec; |
2127 | } |
2128 | if (env.sort_spec.spec_cnt == 0) |
2129 | env.sort_spec = default_sort_spec; |
2130 | |
2131 | if (env.comparison_mode && env.replay_mode) { |
2132 | fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n" ); |
2133 | argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat" ); |
2134 | return 1; |
2135 | } |
2136 | |
2137 | if (env.comparison_mode) |
2138 | err = handle_comparison_mode(); |
2139 | else if (env.replay_mode) |
2140 | err = handle_replay_mode(); |
2141 | else |
2142 | err = handle_verif_mode(); |
2143 | |
2144 | free_verif_stats(env.prog_stats, env.prog_stat_cnt); |
2145 | free_verif_stats(env.baseline_stats, env.baseline_stat_cnt); |
2146 | free(env.join_stats); |
2147 | for (i = 0; i < env.filename_cnt; i++) |
2148 | free(env.filenames[i]); |
2149 | free(env.filenames); |
2150 | for (i = 0; i < env.allow_filter_cnt; i++) { |
2151 | free(env.allow_filters[i].any_glob); |
2152 | free(env.allow_filters[i].file_glob); |
2153 | free(env.allow_filters[i].prog_glob); |
2154 | } |
2155 | free(env.allow_filters); |
2156 | for (i = 0; i < env.deny_filter_cnt; i++) { |
2157 | free(env.deny_filters[i].any_glob); |
2158 | free(env.deny_filters[i].file_glob); |
2159 | free(env.deny_filters[i].prog_glob); |
2160 | } |
2161 | free(env.deny_filters); |
2162 | return -err; |
2163 | } |
2164 | |