1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Test module for stress and analyze performance of vmalloc allocator. |
5 | * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> |
6 | */ |
7 | #include <linux/init.h> |
8 | #include <linux/kernel.h> |
9 | #include <linux/module.h> |
10 | #include <linux/vmalloc.h> |
11 | #include <linux/random.h> |
12 | #include <linux/kthread.h> |
13 | #include <linux/moduleparam.h> |
14 | #include <linux/completion.h> |
15 | #include <linux/delay.h> |
16 | #include <linux/rwsem.h> |
17 | #include <linux/mm.h> |
18 | #include <linux/rcupdate.h> |
19 | #include <linux/slab.h> |
20 | |
21 | #define __param(type, name, init, msg) \ |
22 | static type name = init; \ |
23 | module_param(name, type, 0444); \ |
24 | MODULE_PARM_DESC(name, msg) \ |
25 | |
26 | __param(int, nr_threads, 0, |
27 | "Number of workers to perform tests(min: 1 max: USHRT_MAX)" ); |
28 | |
29 | __param(bool, sequential_test_order, false, |
30 | "Use sequential stress tests order" ); |
31 | |
32 | __param(int, test_repeat_count, 1, |
33 | "Set test repeat counter" ); |
34 | |
35 | __param(int, test_loop_count, 1000000, |
36 | "Set test loop counter" ); |
37 | |
38 | __param(int, nr_pages, 0, |
39 | "Set number of pages for fix_size_alloc_test(default: 1)" ); |
40 | |
41 | __param(bool, use_huge, false, |
42 | "Use vmalloc_huge in fix_size_alloc_test" ); |
43 | |
44 | __param(int, run_test_mask, INT_MAX, |
45 | "Set tests specified in the mask.\n\n" |
46 | "\t\tid: 1, name: fix_size_alloc_test\n" |
47 | "\t\tid: 2, name: full_fit_alloc_test\n" |
48 | "\t\tid: 4, name: long_busy_list_alloc_test\n" |
49 | "\t\tid: 8, name: random_size_alloc_test\n" |
50 | "\t\tid: 16, name: fix_align_alloc_test\n" |
51 | "\t\tid: 32, name: random_size_align_alloc_test\n" |
52 | "\t\tid: 64, name: align_shift_alloc_test\n" |
53 | "\t\tid: 128, name: pcpu_alloc_test\n" |
54 | "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" |
55 | "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" |
56 | "\t\tid: 1024, name: vm_map_ram_test\n" |
57 | /* Add a new test case description here. */ |
58 | ); |
59 | |
60 | /* |
61 | * Read write semaphore for synchronization of setup |
62 | * phase that is done in main thread and workers. |
63 | */ |
64 | static DECLARE_RWSEM(prepare_for_test_rwsem); |
65 | |
66 | /* |
67 | * Completion tracking for worker threads. |
68 | */ |
69 | static DECLARE_COMPLETION(test_all_done_comp); |
70 | static atomic_t test_n_undone = ATOMIC_INIT(0); |
71 | |
72 | static inline void |
73 | test_report_one_done(void) |
74 | { |
75 | if (atomic_dec_and_test(v: &test_n_undone)) |
76 | complete(&test_all_done_comp); |
77 | } |
78 | |
79 | static int random_size_align_alloc_test(void) |
80 | { |
81 | unsigned long size, align; |
82 | unsigned int rnd; |
83 | void *ptr; |
84 | int i; |
85 | |
86 | for (i = 0; i < test_loop_count; i++) { |
87 | rnd = get_random_u8(); |
88 | |
89 | /* |
90 | * Maximum 1024 pages, if PAGE_SIZE is 4096. |
91 | */ |
92 | align = 1 << (rnd % 23); |
93 | |
94 | /* |
95 | * Maximum 10 pages. |
96 | */ |
97 | size = ((rnd % 10) + 1) * PAGE_SIZE; |
98 | |
99 | ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, node: 0, |
100 | caller: __builtin_return_address(0)); |
101 | if (!ptr) |
102 | return -1; |
103 | |
104 | vfree(addr: ptr); |
105 | } |
106 | |
107 | return 0; |
108 | } |
109 | |
110 | /* |
111 | * This test case is supposed to be failed. |
112 | */ |
113 | static int align_shift_alloc_test(void) |
114 | { |
115 | unsigned long align; |
116 | void *ptr; |
117 | int i; |
118 | |
119 | for (i = 0; i < BITS_PER_LONG; i++) { |
120 | align = ((unsigned long) 1) << i; |
121 | |
122 | ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, node: 0, |
123 | caller: __builtin_return_address(0)); |
124 | if (!ptr) |
125 | return -1; |
126 | |
127 | vfree(addr: ptr); |
128 | } |
129 | |
130 | return 0; |
131 | } |
132 | |
133 | static int fix_align_alloc_test(void) |
134 | { |
135 | void *ptr; |
136 | int i; |
137 | |
138 | for (i = 0; i < test_loop_count; i++) { |
139 | ptr = __vmalloc_node(size: 5 * PAGE_SIZE, THREAD_ALIGN << 1, |
140 | GFP_KERNEL | __GFP_ZERO, node: 0, |
141 | caller: __builtin_return_address(0)); |
142 | if (!ptr) |
143 | return -1; |
144 | |
145 | vfree(addr: ptr); |
146 | } |
147 | |
148 | return 0; |
149 | } |
150 | |
151 | static int random_size_alloc_test(void) |
152 | { |
153 | unsigned int n; |
154 | void *p; |
155 | int i; |
156 | |
157 | for (i = 0; i < test_loop_count; i++) { |
158 | n = get_random_u32_inclusive(floor: 1, ceil: 100); |
159 | p = vmalloc(size: n * PAGE_SIZE); |
160 | |
161 | if (!p) |
162 | return -1; |
163 | |
164 | *((__u8 *)p) = 1; |
165 | vfree(addr: p); |
166 | } |
167 | |
168 | return 0; |
169 | } |
170 | |
171 | static int long_busy_list_alloc_test(void) |
172 | { |
173 | void *ptr_1, *ptr_2; |
174 | void **ptr; |
175 | int rv = -1; |
176 | int i; |
177 | |
178 | ptr = vmalloc(size: sizeof(void *) * 15000); |
179 | if (!ptr) |
180 | return rv; |
181 | |
182 | for (i = 0; i < 15000; i++) |
183 | ptr[i] = vmalloc(size: 1 * PAGE_SIZE); |
184 | |
185 | for (i = 0; i < test_loop_count; i++) { |
186 | ptr_1 = vmalloc(size: 100 * PAGE_SIZE); |
187 | if (!ptr_1) |
188 | goto leave; |
189 | |
190 | ptr_2 = vmalloc(size: 1 * PAGE_SIZE); |
191 | if (!ptr_2) { |
192 | vfree(addr: ptr_1); |
193 | goto leave; |
194 | } |
195 | |
196 | *((__u8 *)ptr_1) = 0; |
197 | *((__u8 *)ptr_2) = 1; |
198 | |
199 | vfree(addr: ptr_1); |
200 | vfree(addr: ptr_2); |
201 | } |
202 | |
203 | /* Success */ |
204 | rv = 0; |
205 | |
206 | leave: |
207 | for (i = 0; i < 15000; i++) |
208 | vfree(addr: ptr[i]); |
209 | |
210 | vfree(addr: ptr); |
211 | return rv; |
212 | } |
213 | |
214 | static int full_fit_alloc_test(void) |
215 | { |
216 | void **ptr, **junk_ptr, *tmp; |
217 | int junk_length; |
218 | int rv = -1; |
219 | int i; |
220 | |
221 | junk_length = fls(x: num_online_cpus()); |
222 | junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); |
223 | |
224 | ptr = vmalloc(size: sizeof(void *) * junk_length); |
225 | if (!ptr) |
226 | return rv; |
227 | |
228 | junk_ptr = vmalloc(size: sizeof(void *) * junk_length); |
229 | if (!junk_ptr) { |
230 | vfree(addr: ptr); |
231 | return rv; |
232 | } |
233 | |
234 | for (i = 0; i < junk_length; i++) { |
235 | ptr[i] = vmalloc(size: 1 * PAGE_SIZE); |
236 | junk_ptr[i] = vmalloc(size: 1 * PAGE_SIZE); |
237 | } |
238 | |
239 | for (i = 0; i < junk_length; i++) |
240 | vfree(addr: junk_ptr[i]); |
241 | |
242 | for (i = 0; i < test_loop_count; i++) { |
243 | tmp = vmalloc(size: 1 * PAGE_SIZE); |
244 | |
245 | if (!tmp) |
246 | goto error; |
247 | |
248 | *((__u8 *)tmp) = 1; |
249 | vfree(addr: tmp); |
250 | } |
251 | |
252 | /* Success */ |
253 | rv = 0; |
254 | |
255 | error: |
256 | for (i = 0; i < junk_length; i++) |
257 | vfree(addr: ptr[i]); |
258 | |
259 | vfree(addr: ptr); |
260 | vfree(addr: junk_ptr); |
261 | |
262 | return rv; |
263 | } |
264 | |
265 | static int fix_size_alloc_test(void) |
266 | { |
267 | void *ptr; |
268 | int i; |
269 | |
270 | for (i = 0; i < test_loop_count; i++) { |
271 | if (use_huge) |
272 | ptr = vmalloc_huge(size: (nr_pages > 0 ? nr_pages:1) * PAGE_SIZE, GFP_KERNEL); |
273 | else |
274 | ptr = vmalloc(size: (nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); |
275 | |
276 | if (!ptr) |
277 | return -1; |
278 | |
279 | *((__u8 *)ptr) = 0; |
280 | |
281 | vfree(addr: ptr); |
282 | } |
283 | |
284 | return 0; |
285 | } |
286 | |
287 | static int |
288 | pcpu_alloc_test(void) |
289 | { |
290 | int rv = 0; |
291 | #ifndef CONFIG_NEED_PER_CPU_KM |
292 | void __percpu **pcpu; |
293 | size_t size, align; |
294 | int i; |
295 | |
296 | pcpu = vmalloc(size: sizeof(void __percpu *) * 35000); |
297 | if (!pcpu) |
298 | return -1; |
299 | |
300 | for (i = 0; i < 35000; i++) { |
301 | size = get_random_u32_inclusive(floor: 1, PAGE_SIZE / 4); |
302 | |
303 | /* |
304 | * Maximum PAGE_SIZE |
305 | */ |
306 | align = 1 << get_random_u32_inclusive(floor: 1, ceil: 11); |
307 | |
308 | pcpu[i] = __alloc_percpu(size, align); |
309 | if (!pcpu[i]) |
310 | rv = -1; |
311 | } |
312 | |
313 | for (i = 0; i < 35000; i++) |
314 | free_percpu(pdata: pcpu[i]); |
315 | |
316 | vfree(addr: pcpu); |
317 | #endif |
318 | return rv; |
319 | } |
320 | |
321 | struct test_kvfree_rcu { |
322 | struct rcu_head rcu; |
323 | unsigned char array[20]; |
324 | }; |
325 | |
326 | static int |
327 | kvfree_rcu_1_arg_vmalloc_test(void) |
328 | { |
329 | struct test_kvfree_rcu *p; |
330 | int i; |
331 | |
332 | for (i = 0; i < test_loop_count; i++) { |
333 | p = vmalloc(size: 1 * PAGE_SIZE); |
334 | if (!p) |
335 | return -1; |
336 | |
337 | p->array[0] = 'a'; |
338 | kvfree_rcu_mightsleep(p); |
339 | } |
340 | |
341 | return 0; |
342 | } |
343 | |
344 | static int |
345 | kvfree_rcu_2_arg_vmalloc_test(void) |
346 | { |
347 | struct test_kvfree_rcu *p; |
348 | int i; |
349 | |
350 | for (i = 0; i < test_loop_count; i++) { |
351 | p = vmalloc(size: 1 * PAGE_SIZE); |
352 | if (!p) |
353 | return -1; |
354 | |
355 | p->array[0] = 'a'; |
356 | kvfree_rcu(p, rcu); |
357 | } |
358 | |
359 | return 0; |
360 | } |
361 | |
362 | static int |
363 | vm_map_ram_test(void) |
364 | { |
365 | unsigned long nr_allocated; |
366 | unsigned int map_nr_pages; |
367 | unsigned char *v_ptr; |
368 | struct page **pages; |
369 | int i; |
370 | |
371 | map_nr_pages = nr_pages > 0 ? nr_pages:1; |
372 | pages = kcalloc(n: map_nr_pages, size: sizeof(struct page *), GFP_KERNEL); |
373 | if (!pages) |
374 | return -1; |
375 | |
376 | nr_allocated = alloc_pages_bulk_array(GFP_KERNEL, nr_pages: map_nr_pages, page_array: pages); |
377 | if (nr_allocated != map_nr_pages) |
378 | goto cleanup; |
379 | |
380 | /* Run the test loop. */ |
381 | for (i = 0; i < test_loop_count; i++) { |
382 | v_ptr = vm_map_ram(pages, count: map_nr_pages, NUMA_NO_NODE); |
383 | *v_ptr = 'a'; |
384 | vm_unmap_ram(mem: v_ptr, count: map_nr_pages); |
385 | } |
386 | |
387 | cleanup: |
388 | for (i = 0; i < nr_allocated; i++) |
389 | __free_page(pages[i]); |
390 | |
391 | kfree(objp: pages); |
392 | |
393 | /* 0 indicates success. */ |
394 | return nr_allocated != map_nr_pages; |
395 | } |
396 | |
397 | struct test_case_desc { |
398 | const char *test_name; |
399 | int (*test_func)(void); |
400 | }; |
401 | |
402 | static struct test_case_desc test_case_array[] = { |
403 | { "fix_size_alloc_test" , fix_size_alloc_test }, |
404 | { "full_fit_alloc_test" , full_fit_alloc_test }, |
405 | { "long_busy_list_alloc_test" , long_busy_list_alloc_test }, |
406 | { "random_size_alloc_test" , random_size_alloc_test }, |
407 | { "fix_align_alloc_test" , fix_align_alloc_test }, |
408 | { "random_size_align_alloc_test" , random_size_align_alloc_test }, |
409 | { "align_shift_alloc_test" , align_shift_alloc_test }, |
410 | { "pcpu_alloc_test" , pcpu_alloc_test }, |
411 | { "kvfree_rcu_1_arg_vmalloc_test" , kvfree_rcu_1_arg_vmalloc_test }, |
412 | { "kvfree_rcu_2_arg_vmalloc_test" , kvfree_rcu_2_arg_vmalloc_test }, |
413 | { "vm_map_ram_test" , vm_map_ram_test }, |
414 | /* Add a new test case here. */ |
415 | }; |
416 | |
417 | struct test_case_data { |
418 | int test_failed; |
419 | int test_passed; |
420 | u64 time; |
421 | }; |
422 | |
423 | static struct test_driver { |
424 | struct task_struct *task; |
425 | struct test_case_data data[ARRAY_SIZE(test_case_array)]; |
426 | |
427 | unsigned long start; |
428 | unsigned long stop; |
429 | } *tdriver; |
430 | |
431 | static void shuffle_array(int *arr, int n) |
432 | { |
433 | int i, j; |
434 | |
435 | for (i = n - 1; i > 0; i--) { |
436 | /* Cut the range. */ |
437 | j = get_random_u32_below(ceil: i); |
438 | |
439 | /* Swap indexes. */ |
440 | swap(arr[i], arr[j]); |
441 | } |
442 | } |
443 | |
444 | static int test_func(void *private) |
445 | { |
446 | struct test_driver *t = private; |
447 | int random_array[ARRAY_SIZE(test_case_array)]; |
448 | int index, i, j; |
449 | ktime_t kt; |
450 | u64 delta; |
451 | |
452 | for (i = 0; i < ARRAY_SIZE(test_case_array); i++) |
453 | random_array[i] = i; |
454 | |
455 | if (!sequential_test_order) |
456 | shuffle_array(arr: random_array, ARRAY_SIZE(test_case_array)); |
457 | |
458 | /* |
459 | * Block until initialization is done. |
460 | */ |
461 | down_read(sem: &prepare_for_test_rwsem); |
462 | |
463 | t->start = get_cycles(); |
464 | for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { |
465 | index = random_array[i]; |
466 | |
467 | /* |
468 | * Skip tests if run_test_mask has been specified. |
469 | */ |
470 | if (!((run_test_mask & (1 << index)) >> index)) |
471 | continue; |
472 | |
473 | kt = ktime_get(); |
474 | for (j = 0; j < test_repeat_count; j++) { |
475 | if (!test_case_array[index].test_func()) |
476 | t->data[index].test_passed++; |
477 | else |
478 | t->data[index].test_failed++; |
479 | } |
480 | |
481 | /* |
482 | * Take an average time that test took. |
483 | */ |
484 | delta = (u64) ktime_us_delta(later: ktime_get(), earlier: kt); |
485 | do_div(delta, (u32) test_repeat_count); |
486 | |
487 | t->data[index].time = delta; |
488 | } |
489 | t->stop = get_cycles(); |
490 | |
491 | up_read(sem: &prepare_for_test_rwsem); |
492 | test_report_one_done(); |
493 | |
494 | /* |
495 | * Wait for the kthread_stop() call. |
496 | */ |
497 | while (!kthread_should_stop()) |
498 | msleep(msecs: 10); |
499 | |
500 | return 0; |
501 | } |
502 | |
503 | static int |
504 | init_test_configurtion(void) |
505 | { |
506 | /* |
507 | * A maximum number of workers is defined as hard-coded |
508 | * value and set to USHRT_MAX. We add such gap just in |
509 | * case and for potential heavy stressing. |
510 | */ |
511 | nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); |
512 | |
513 | /* Allocate the space for test instances. */ |
514 | tdriver = kvcalloc(n: nr_threads, size: sizeof(*tdriver), GFP_KERNEL); |
515 | if (tdriver == NULL) |
516 | return -1; |
517 | |
518 | if (test_repeat_count <= 0) |
519 | test_repeat_count = 1; |
520 | |
521 | if (test_loop_count <= 0) |
522 | test_loop_count = 1; |
523 | |
524 | return 0; |
525 | } |
526 | |
527 | static void do_concurrent_test(void) |
528 | { |
529 | int i, ret; |
530 | |
531 | /* |
532 | * Set some basic configurations plus sanity check. |
533 | */ |
534 | ret = init_test_configurtion(); |
535 | if (ret < 0) |
536 | return; |
537 | |
538 | /* |
539 | * Put on hold all workers. |
540 | */ |
541 | down_write(sem: &prepare_for_test_rwsem); |
542 | |
543 | for (i = 0; i < nr_threads; i++) { |
544 | struct test_driver *t = &tdriver[i]; |
545 | |
546 | t->task = kthread_run(test_func, t, "vmalloc_test/%d" , i); |
547 | |
548 | if (!IS_ERR(ptr: t->task)) |
549 | /* Success. */ |
550 | atomic_inc(v: &test_n_undone); |
551 | else |
552 | pr_err("Failed to start %d kthread\n" , i); |
553 | } |
554 | |
555 | /* |
556 | * Now let the workers do their job. |
557 | */ |
558 | up_write(sem: &prepare_for_test_rwsem); |
559 | |
560 | /* |
561 | * Sleep quiet until all workers are done with 1 second |
562 | * interval. Since the test can take a lot of time we |
563 | * can run into a stack trace of the hung task. That is |
564 | * why we go with completion_timeout and HZ value. |
565 | */ |
566 | do { |
567 | ret = wait_for_completion_timeout(x: &test_all_done_comp, HZ); |
568 | } while (!ret); |
569 | |
570 | for (i = 0; i < nr_threads; i++) { |
571 | struct test_driver *t = &tdriver[i]; |
572 | int j; |
573 | |
574 | if (!IS_ERR(ptr: t->task)) |
575 | kthread_stop(k: t->task); |
576 | |
577 | for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { |
578 | if (!((run_test_mask & (1 << j)) >> j)) |
579 | continue; |
580 | |
581 | pr_info( |
582 | "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n" , |
583 | test_case_array[j].test_name, |
584 | t->data[j].test_passed, |
585 | t->data[j].test_failed, |
586 | test_repeat_count, test_loop_count, |
587 | t->data[j].time); |
588 | } |
589 | |
590 | pr_info("All test took worker%d=%lu cycles\n" , |
591 | i, t->stop - t->start); |
592 | } |
593 | |
594 | kvfree(addr: tdriver); |
595 | } |
596 | |
597 | static int vmalloc_test_init(void) |
598 | { |
599 | do_concurrent_test(); |
600 | return -EAGAIN; /* Fail will directly unload the module */ |
601 | } |
602 | |
603 | static void vmalloc_test_exit(void) |
604 | { |
605 | } |
606 | |
607 | module_init(vmalloc_test_init) |
608 | module_exit(vmalloc_test_exit) |
609 | |
610 | MODULE_LICENSE("GPL" ); |
611 | MODULE_AUTHOR("Uladzislau Rezki" ); |
612 | MODULE_DESCRIPTION("vmalloc test module" ); |
613 | |