| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ |
| 3 | |
| 4 | #include <sys/types.h> |
| 5 | #include <sys/socket.h> |
| 6 | #include <pthread.h> |
| 7 | #include <argp.h> |
| 8 | |
| 9 | #include "bench.h" |
| 10 | #include "bench_local_storage_create.skel.h" |
| 11 | |
| 12 | struct thread { |
| 13 | int *fds; |
| 14 | pthread_t *pthds; |
| 15 | int *pthd_results; |
| 16 | }; |
| 17 | |
| 18 | static struct bench_local_storage_create *skel; |
| 19 | static struct thread *threads; |
| 20 | static long create_owner_errs; |
| 21 | static int storage_type = BPF_MAP_TYPE_SK_STORAGE; |
| 22 | static int batch_sz = 32; |
| 23 | |
| 24 | enum { |
| 25 | ARG_BATCH_SZ = 9000, |
| 26 | ARG_STORAGE_TYPE = 9001, |
| 27 | }; |
| 28 | |
| 29 | static const struct argp_option opts[] = { |
| 30 | { "batch-size" , ARG_BATCH_SZ, "BATCH_SIZE" , 0, |
| 31 | "The number of storage creations in each batch" }, |
| 32 | { "storage-type" , ARG_STORAGE_TYPE, "STORAGE_TYPE" , 0, |
| 33 | "The type of local storage to test (socket or task)" }, |
| 34 | {}, |
| 35 | }; |
| 36 | |
| 37 | static error_t parse_arg(int key, char *arg, struct argp_state *state) |
| 38 | { |
| 39 | int ret; |
| 40 | |
| 41 | switch (key) { |
| 42 | case ARG_BATCH_SZ: |
| 43 | ret = atoi(arg); |
| 44 | if (ret < 1) { |
| 45 | fprintf(stderr, "invalid batch-size\n" ); |
| 46 | argp_usage(state); |
| 47 | } |
| 48 | batch_sz = ret; |
| 49 | break; |
| 50 | case ARG_STORAGE_TYPE: |
| 51 | if (!strcmp(arg, "task" )) { |
| 52 | storage_type = BPF_MAP_TYPE_TASK_STORAGE; |
| 53 | } else if (!strcmp(arg, "socket" )) { |
| 54 | storage_type = BPF_MAP_TYPE_SK_STORAGE; |
| 55 | } else { |
| 56 | fprintf(stderr, "invalid storage-type (socket or task)\n" ); |
| 57 | argp_usage(state); |
| 58 | } |
| 59 | break; |
| 60 | default: |
| 61 | return ARGP_ERR_UNKNOWN; |
| 62 | } |
| 63 | |
| 64 | return 0; |
| 65 | } |
| 66 | |
| 67 | const struct argp bench_local_storage_create_argp = { |
| 68 | .options = opts, |
| 69 | .parser = parse_arg, |
| 70 | }; |
| 71 | |
| 72 | static void validate(void) |
| 73 | { |
| 74 | if (env.consumer_cnt != 0) { |
| 75 | fprintf(stderr, |
| 76 | "local-storage-create benchmark does not need consumer\n" ); |
| 77 | exit(1); |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | static void setup(void) |
| 82 | { |
| 83 | int i; |
| 84 | |
| 85 | skel = bench_local_storage_create__open_and_load(); |
| 86 | if (!skel) { |
| 87 | fprintf(stderr, "error loading skel\n" ); |
| 88 | exit(1); |
| 89 | } |
| 90 | |
| 91 | skel->bss->bench_pid = getpid(); |
| 92 | if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { |
| 93 | if (!bpf_program__attach(skel->progs.socket_post_create)) { |
| 94 | fprintf(stderr, "Error attaching bpf program\n" ); |
| 95 | exit(1); |
| 96 | } |
| 97 | } else { |
| 98 | if (!bpf_program__attach(skel->progs.sched_process_fork)) { |
| 99 | fprintf(stderr, "Error attaching bpf program\n" ); |
| 100 | exit(1); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | if (!bpf_program__attach(skel->progs.kmalloc)) { |
| 105 | fprintf(stderr, "Error attaching bpf program\n" ); |
| 106 | exit(1); |
| 107 | } |
| 108 | |
| 109 | threads = calloc(env.producer_cnt, sizeof(*threads)); |
| 110 | |
| 111 | if (!threads) { |
| 112 | fprintf(stderr, "cannot alloc thread_res\n" ); |
| 113 | exit(1); |
| 114 | } |
| 115 | |
| 116 | for (i = 0; i < env.producer_cnt; i++) { |
| 117 | struct thread *t = &threads[i]; |
| 118 | |
| 119 | if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { |
| 120 | t->fds = malloc(batch_sz * sizeof(*t->fds)); |
| 121 | if (!t->fds) { |
| 122 | fprintf(stderr, "cannot alloc t->fds\n" ); |
| 123 | exit(1); |
| 124 | } |
| 125 | } else { |
| 126 | t->pthds = malloc(batch_sz * sizeof(*t->pthds)); |
| 127 | if (!t->pthds) { |
| 128 | fprintf(stderr, "cannot alloc t->pthds\n" ); |
| 129 | exit(1); |
| 130 | } |
| 131 | t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results)); |
| 132 | if (!t->pthd_results) { |
| 133 | fprintf(stderr, "cannot alloc t->pthd_results\n" ); |
| 134 | exit(1); |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | static void measure(struct bench_res *res) |
| 141 | { |
| 142 | res->hits = atomic_swap(&skel->bss->create_cnts, 0); |
| 143 | res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); |
| 144 | } |
| 145 | |
| 146 | static void *sk_producer(void *input) |
| 147 | { |
| 148 | struct thread *t = &threads[(long)(input)]; |
| 149 | int *fds = t->fds; |
| 150 | int i; |
| 151 | |
| 152 | while (true) { |
| 153 | for (i = 0; i < batch_sz; i++) { |
| 154 | fds[i] = socket(AF_INET6, SOCK_DGRAM, 0); |
| 155 | if (fds[i] == -1) |
| 156 | atomic_inc(&create_owner_errs); |
| 157 | } |
| 158 | |
| 159 | for (i = 0; i < batch_sz; i++) { |
| 160 | if (fds[i] != -1) |
| 161 | close(fds[i]); |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | return NULL; |
| 166 | } |
| 167 | |
| 168 | static void *thread_func(void *arg) |
| 169 | { |
| 170 | return NULL; |
| 171 | } |
| 172 | |
| 173 | static void *task_producer(void *input) |
| 174 | { |
| 175 | struct thread *t = &threads[(long)(input)]; |
| 176 | pthread_t *pthds = t->pthds; |
| 177 | int *pthd_results = t->pthd_results; |
| 178 | int i; |
| 179 | |
| 180 | while (true) { |
| 181 | for (i = 0; i < batch_sz; i++) { |
| 182 | pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL); |
| 183 | if (pthd_results[i]) |
| 184 | atomic_inc(&create_owner_errs); |
| 185 | } |
| 186 | |
| 187 | for (i = 0; i < batch_sz; i++) { |
| 188 | if (!pthd_results[i]) |
| 189 | pthread_join(pthds[i], NULL); |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | return NULL; |
| 194 | } |
| 195 | |
| 196 | static void *producer(void *input) |
| 197 | { |
| 198 | if (storage_type == BPF_MAP_TYPE_SK_STORAGE) |
| 199 | return sk_producer(input); |
| 200 | else |
| 201 | return task_producer(input); |
| 202 | } |
| 203 | |
| 204 | static void report_progress(int iter, struct bench_res *res, long delta_ns) |
| 205 | { |
| 206 | double creates_per_sec, kmallocs_per_create; |
| 207 | |
| 208 | creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0); |
| 209 | kmallocs_per_create = (double)res->drops / res->hits; |
| 210 | |
| 211 | printf("Iter %3d (%7.3lfus): " , |
| 212 | iter, (delta_ns - 1000000000) / 1000.0); |
| 213 | printf("creates %8.3lfk/s (%7.3lfk/prod), " , |
| 214 | creates_per_sec, creates_per_sec / env.producer_cnt); |
| 215 | printf("%3.2lf kmallocs/create\n" , kmallocs_per_create); |
| 216 | } |
| 217 | |
| 218 | static void report_final(struct bench_res res[], int res_cnt) |
| 219 | { |
| 220 | double creates_mean = 0.0, creates_stddev = 0.0; |
| 221 | long total_creates = 0, total_kmallocs = 0; |
| 222 | int i; |
| 223 | |
| 224 | for (i = 0; i < res_cnt; i++) { |
| 225 | creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); |
| 226 | total_creates += res[i].hits; |
| 227 | total_kmallocs += res[i].drops; |
| 228 | } |
| 229 | |
| 230 | if (res_cnt > 1) { |
| 231 | for (i = 0; i < res_cnt; i++) |
| 232 | creates_stddev += (creates_mean - res[i].hits / 1000.0) * |
| 233 | (creates_mean - res[i].hits / 1000.0) / |
| 234 | (res_cnt - 1.0); |
| 235 | creates_stddev = sqrt(creates_stddev); |
| 236 | } |
| 237 | printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), " , |
| 238 | creates_mean, creates_stddev, creates_mean / env.producer_cnt); |
| 239 | printf("%4.2lf kmallocs/create\n" , (double)total_kmallocs / total_creates); |
| 240 | if (create_owner_errs || skel->bss->create_errs) |
| 241 | printf("%s() errors %ld create_errs %ld\n" , |
| 242 | storage_type == BPF_MAP_TYPE_SK_STORAGE ? |
| 243 | "socket" : "pthread_create" , |
| 244 | create_owner_errs, |
| 245 | skel->bss->create_errs); |
| 246 | } |
| 247 | |
| 248 | /* Benchmark performance of creating bpf local storage */ |
| 249 | const struct bench bench_local_storage_create = { |
| 250 | .name = "local-storage-create" , |
| 251 | .argp = &bench_local_storage_create_argp, |
| 252 | .validate = validate, |
| 253 | .setup = setup, |
| 254 | .producer_thread = producer, |
| 255 | .measure = measure, |
| 256 | .report_progress = report_progress, |
| 257 | .report_final = report_final, |
| 258 | }; |
| 259 | |