1/* Copyright (C) 2002-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18#define _GNU_SOURCE 1
19#include <argp.h>
20#include <error.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
24#include <limits.h>
25#include <pthread.h>
26#include <signal.h>
27#include <stdbool.h>
28#include <stdlib.h>
29#include <string.h>
30#include <time.h>
31#include <unistd.h>
32#include <sys/param.h>
33#include <sys/types.h>
34
35#ifndef MAX_THREADS
36# define MAX_THREADS 100000
37#endif
38#ifndef DEFAULT_THREADS
39# define DEFAULT_THREADS 50
40#endif
41
42
43#define OPT_TO_THREAD 300
44#define OPT_TO_PROCESS 301
45#define OPT_SYNC_SIGNAL 302
46#define OPT_SYNC_JOIN 303
47#define OPT_TOPLEVEL 304
48
49
50static const struct argp_option options[] =
51 {
52 { NULL, 0, NULL, 0, "\
53This is a test for threads so we allow ther user to selection the number of \
54threads which are used at any one time. Independently the total number of \
55rounds can be selected. This is the total number of threads which will have \
56run when the process terminates:" },
57 { "threads", 't', "NUMBER", 0, "Number of threads used at once" },
58 { "starts", 's', "NUMBER", 0, "Total number of working threads" },
59 { "toplevel", OPT_TOPLEVEL, "NUMBER", 0,
60 "Number of toplevel threads which start the other threads; this \
61implies --sync-join" },
62
63 { NULL, 0, NULL, 0, "\
64Each thread can do one of two things: sleep or do work. The latter is 100% \
65CPU bound. The work load is the probability a thread does work. All values \
66from zero to 100 (inclusive) are valid. How often each thread repeats this \
67can be determined by the number of rounds. The work cost determines how long \
68each work session (not sleeping) takes. If it is zero a thread would \
69effectively nothing. By setting the number of rounds to zero the thread \
70does no work at all and pure thread creation times can be measured." },
71 { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" },
72 { "workcost", 'c', "NUMBER", 0,
73 "Factor in the cost of each round of working" },
74 { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" },
75
76 { NULL, 0, NULL, 0, "\
77There are a number of different methods how thread creation can be \
78synchronized. Synchronization is necessary since the number of concurrently \
79running threads is limited." },
80 { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0,
81 "Synchronize using a signal (default)" },
82 { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" },
83
84 { NULL, 0, NULL, 0, "\
85One parameter for each threads execution is the size of the stack. If this \
86parameter is not used the system's default stack size is used. If many \
87threads are used the stack size should be chosen quite small." },
88 { "stacksize", 'S', "BYTES", 0, "Size of threads stack" },
89 { "guardsize", 'g', "BYTES", 0,
90 "Size of stack guard area; must fit into the stack" },
91
92 { NULL, 0, NULL, 0, "Signal options:" },
93 { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" },
94 { "to-process", OPT_TO_PROCESS, NULL, 0,
95 "Send signal to process (default)" },
96
97 { NULL, 0, NULL, 0, "Administrative options:" },
98 { "progress", 'p', NULL, 0, "Show signs of progress" },
99 { "timing", 'T', NULL, 0,
100 "Measure time from startup to the last thread finishing" },
101 { NULL, 0, NULL, 0, NULL }
102 };
103
104/* Prototype for option handler. */
105static error_t parse_opt (int key, char *arg, struct argp_state *state);
106
107/* Data structure to communicate with argp functions. */
108static struct argp argp =
109{
110 options, parse_opt
111};
112
113
114static unsigned long int threads = DEFAULT_THREADS;
115static unsigned long int workload = 75;
116static unsigned long int workcost = 20;
117static unsigned long int rounds = 10;
118static long int starts = 5000;
119static unsigned long int stacksize;
120static long int guardsize = -1;
121static bool progress;
122static bool timing;
123static bool to_thread;
124static unsigned long int toplevel = 1;
125
126
127static long int running;
128static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
129
130static pid_t pid;
131static pthread_t tmain;
132
133static clockid_t cl;
134static struct timespec start_time;
135
136
137static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER;
138unsigned int sum;
139
140static enum
141 {
142 sync_signal,
143 sync_join
144 }
145sync_method;
146
147
148/* We use 64bit values for the times. */
149typedef unsigned long long int hp_timing_t;
150
151
152/* Attributes for all created threads. */
153static pthread_attr_t attr;
154
155
156static void *
157work (void *arg)
158{
159 unsigned long int i;
160 unsigned int state = (unsigned long int) arg;
161
162 for (i = 0; i < rounds; ++i)
163 {
164 /* Determine what to do. */
165 unsigned int rnum;
166
167 /* Uniform distribution. */
168 do
169 rnum = rand_r (seed: &state);
170 while (rnum >= UINT_MAX - (UINT_MAX % 100));
171
172 rnum %= 100;
173
174 if (rnum < workload)
175 {
176 int j;
177 int a[4] = { i, rnum, i + rnum, rnum - i };
178
179 if (progress)
180 write (STDERR_FILENO, "c", 1);
181
182 for (j = 0; j < workcost; ++j)
183 {
184 a[0] += a[3] >> 12;
185 a[1] += a[2] >> 20;
186 a[2] += a[1] ^ 0x3423423;
187 a[3] += a[0] - a[1];
188 }
189
190 pthread_mutex_lock (mutex: &sum_mutex);
191 sum += a[0] + a[1] + a[2] + a[3];
192 pthread_mutex_unlock (mutex: &sum_mutex);
193 }
194 else
195 {
196 /* Just sleep. */
197 struct timespec tv;
198
199 tv.tv_sec = 0;
200 tv.tv_nsec = 10000000;
201
202 if (progress)
203 write (STDERR_FILENO, "w", 1);
204
205 nanosleep (requested_time: &tv, NULL);
206 }
207 }
208
209 return NULL;
210}
211
212
213static void *
214thread_function (void *arg)
215{
216 work (arg);
217
218 pthread_mutex_lock (mutex: &running_mutex);
219 if (--running <= 0 && starts <= 0)
220 {
221 /* We are done. */
222 if (progress)
223 write (STDERR_FILENO, "\n", 1);
224
225 if (timing)
226 {
227 struct timespec end_time;
228
229 if (clock_gettime (clock_id: cl, tp: &end_time) == 0)
230 {
231 end_time.tv_sec -= start_time.tv_sec;
232 end_time.tv_nsec -= start_time.tv_nsec;
233 if (end_time.tv_nsec < 0)
234 {
235 end_time.tv_nsec += 1000000000;
236 --end_time.tv_sec;
237 }
238
239 printf (format: "\nRuntime: %lu.%09lu seconds\n",
240 (unsigned long int) end_time.tv_sec,
241 (unsigned long int) end_time.tv_nsec);
242 }
243 }
244
245 printf (format: "Result: %08x\n", sum);
246
247 exit (0);
248 }
249 pthread_mutex_unlock (mutex: &running_mutex);
250
251 if (sync_method == sync_signal)
252 {
253 if (to_thread)
254 /* This code sends a signal to the main thread. */
255 pthread_kill (threadid: tmain, SIGUSR1);
256 else
257 /* Use this code to test sending a signal to the process. */
258 kill (pid: pid, SIGUSR1);
259 }
260
261 if (progress)
262 write (STDERR_FILENO, "f", 1);
263
264 return NULL;
265}
266
267
268struct start_info
269{
270 unsigned int starts;
271 unsigned int threads;
272};
273
274
275static void *
276start_threads (void *arg)
277{
278 struct start_info *si = arg;
279 unsigned int starts = si->starts;
280 pthread_t ths[si->threads];
281 unsigned int state = starts;
282 unsigned int n;
283 unsigned int i = 0;
284 int err;
285
286 if (progress)
287 write (STDERR_FILENO, "T", 1);
288
289 memset (ths, '\0', sizeof (pthread_t) * si->threads);
290
291 while (starts-- > 0)
292 {
293 if (ths[i] != 0)
294 {
295 /* Wait for the threads in the order they were created. */
296 err = pthread_join (th: ths[i], NULL);
297 if (err != 0)
298 error (EXIT_FAILURE, errnum: err, format: "cannot join thread");
299
300 if (progress)
301 write (STDERR_FILENO, "f", 1);
302 }
303
304 err = pthread_create (newthread: &ths[i], attr: &attr, start_routine: work,
305 arg: (void *) (long) (rand_r (seed: &state) + starts + i));
306
307 if (err != 0)
308 error (EXIT_FAILURE, errnum: err, format: "cannot start thread");
309
310 if (progress)
311 write (STDERR_FILENO, "t", 1);
312
313 if (++i == si->threads)
314 i = 0;
315 }
316
317 n = i;
318 do
319 {
320 if (ths[i] != 0)
321 {
322 err = pthread_join (th: ths[i], NULL);
323 if (err != 0)
324 error (EXIT_FAILURE, errnum: err, format: "cannot join thread");
325
326 if (progress)
327 write (STDERR_FILENO, "f", 1);
328 }
329
330 if (++i == si->threads)
331 i = 0;
332 }
333 while (i != n);
334
335 if (progress)
336 write (STDERR_FILENO, "F", 1);
337
338 return NULL;
339}
340
341
342int
343main (int argc, char *argv[])
344{
345 int remaining;
346 sigset_t ss;
347 pthread_t th;
348 pthread_t *ths = NULL;
349 int empty = 0;
350 int last;
351 bool cont = true;
352
353 /* Parse and process arguments. */
354 argp_parse (argp: &argp, argc: argc, argv: argv, flags: 0, arg_index: &remaining, NULL);
355
356 if (sync_method == sync_join)
357 {
358 ths = (pthread_t *) calloc (nmemb: threads, size: sizeof (pthread_t));
359 if (ths == NULL)
360 error (EXIT_FAILURE, errno,
361 format: "cannot allocate memory for thread descriptor array");
362
363 last = threads;
364 }
365 else
366 {
367 ths = &th;
368 last = 1;
369 }
370
371 if (toplevel > threads)
372 {
373 printf (format: "resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
374 threads);
375 toplevel = threads;
376 }
377
378 if (timing)
379 {
380 if (clock_getcpuclockid (pid: 0, clock_id: &cl) != 0
381 || clock_gettime (clock_id: cl, tp: &start_time) != 0)
382 timing = false;
383 }
384
385 /* We need this later. */
386 pid = getpid ();
387 tmain = pthread_self ();
388
389 /* We use signal SIGUSR1 for communication between the threads and
390 the main thread. We only want sychronous notification. */
391 if (sync_method == sync_signal)
392 {
393 sigemptyset (&ss);
394 sigaddset (&ss, SIGUSR1);
395 if (sigprocmask (SIG_BLOCK, set: &ss, NULL) != 0)
396 error (EXIT_FAILURE, errno, format: "cannot set signal mask");
397 }
398
399 /* Create the thread attributes. */
400 pthread_attr_init (attr: &attr);
401
402 /* If the user provided a stack size use it. */
403 if (stacksize != 0
404 && pthread_attr_setstacksize (attr: &attr, stacksize: stacksize) != 0)
405 puts (s: "could not set stack size; will use default");
406 /* And stack guard size. */
407 if (guardsize != -1
408 && pthread_attr_setguardsize (attr: &attr, guardsize: guardsize) != 0)
409 puts (s: "invalid stack guard size; will use default");
410
411 /* All threads are created detached if we are not using pthread_join
412 to synchronize. */
413 if (sync_method != sync_join)
414 pthread_attr_setdetachstate (attr: &attr, PTHREAD_CREATE_DETACHED);
415
416 if (sync_method == sync_signal)
417 {
418 while (1)
419 {
420 int err;
421 bool do_wait = false;
422
423 pthread_mutex_lock (mutex: &running_mutex);
424 if (starts-- < 0)
425 cont = false;
426 else
427 do_wait = ++running >= threads && starts > 0;
428
429 pthread_mutex_unlock (mutex: &running_mutex);
430
431 if (! cont)
432 break;
433
434 if (progress)
435 write (STDERR_FILENO, "t", 1);
436
437 err = pthread_create (newthread: &ths[empty], attr: &attr, start_routine: thread_function,
438 arg: (void *) starts);
439 if (err != 0)
440 error (EXIT_FAILURE, errnum: err, format: "cannot start thread %lu", starts);
441
442 if (++empty == last)
443 empty = 0;
444
445 if (do_wait)
446 sigwaitinfo (set: &ss, NULL);
447 }
448
449 /* Do nothing anymore. On of the threads will terminate the program. */
450 sigfillset (&ss);
451 sigdelset (&ss, SIGINT);
452 while (1)
453 sigsuspend (set: &ss);
454 }
455 else
456 {
457 pthread_t ths[toplevel];
458 struct start_info si[toplevel];
459 unsigned int i;
460
461 for (i = 0; i < toplevel; ++i)
462 {
463 unsigned int child_starts = starts / (toplevel - i);
464 unsigned int child_threads = threads / (toplevel - i);
465 int err;
466
467 si[i].starts = child_starts;
468 si[i].threads = child_threads;
469
470 err = pthread_create (newthread: &ths[i], attr: &attr, start_routine: start_threads, arg: &si[i]);
471 if (err != 0)
472 error (EXIT_FAILURE, errnum: err, format: "cannot start thread");
473
474 starts -= child_starts;
475 threads -= child_threads;
476 }
477
478 for (i = 0; i < toplevel; ++i)
479 {
480 int err = pthread_join (th: ths[i], NULL);
481
482 if (err != 0)
483 error (EXIT_FAILURE, errnum: err, format: "cannot join thread");
484 }
485
486 /* We are done. */
487 if (progress)
488 write (STDERR_FILENO, "\n", 1);
489
490 if (timing)
491 {
492 struct timespec end_time;
493
494 if (clock_gettime (clock_id: cl, tp: &end_time) == 0)
495 {
496 end_time.tv_sec -= start_time.tv_sec;
497 end_time.tv_nsec -= start_time.tv_nsec;
498 if (end_time.tv_nsec < 0)
499 {
500 end_time.tv_nsec += 1000000000;
501 --end_time.tv_sec;
502 }
503
504 printf (format: "\nRuntime: %lu.%09lu seconds\n",
505 (unsigned long int) end_time.tv_sec,
506 (unsigned long int) end_time.tv_nsec);
507 }
508 }
509
510 printf (format: "Result: %08x\n", sum);
511
512 exit (0);
513 }
514
515 /* NOTREACHED */
516 return 0;
517}
518
519
520/* Handle program arguments. */
521static error_t
522parse_opt (int key, char *arg, struct argp_state *state)
523{
524 unsigned long int num;
525 long int snum;
526
527 switch (key)
528 {
529 case 't':
530 num = strtoul (arg, NULL, 0);
531 if (num <= MAX_THREADS)
532 threads = num;
533 else
534 printf (format: "\
535number of threads limited to %u; recompile with a higher limit if necessary",
536 MAX_THREADS);
537 break;
538
539 case 'w':
540 num = strtoul (arg, NULL, 0);
541 if (num <= 100)
542 workload = num;
543 else
544 puts (s: "workload must be between 0 and 100 percent");
545 break;
546
547 case 'c':
548 workcost = strtoul (arg, NULL, 0);
549 break;
550
551 case 'r':
552 rounds = strtoul (arg, NULL, 0);
553 break;
554
555 case 's':
556 starts = strtoul (arg, NULL, 0);
557 break;
558
559 case 'S':
560 num = strtoul (arg, NULL, 0);
561 if (num >= PTHREAD_STACK_MIN)
562 stacksize = num;
563 else
564 printf (format: "minimum stack size is %d\n", PTHREAD_STACK_MIN);
565 break;
566
567 case 'g':
568 snum = strtol (arg, NULL, 0);
569 if (snum < 0)
570 printf (format: "invalid guard size %s\n", arg);
571 else
572 guardsize = snum;
573 break;
574
575 case 'p':
576 progress = true;
577 break;
578
579 case 'T':
580 timing = true;
581 break;
582
583 case OPT_TO_THREAD:
584 to_thread = true;
585 break;
586
587 case OPT_TO_PROCESS:
588 to_thread = false;
589 break;
590
591 case OPT_SYNC_SIGNAL:
592 sync_method = sync_signal;
593 break;
594
595 case OPT_SYNC_JOIN:
596 sync_method = sync_join;
597 break;
598
599 case OPT_TOPLEVEL:
600 num = strtoul (arg, NULL, 0);
601 if (num < MAX_THREADS)
602 toplevel = num;
603 else
604 printf (format: "\
605number of threads limited to %u; recompile with a higher limit if necessary",
606 MAX_THREADS);
607 sync_method = sync_join;
608 break;
609
610 default:
611 return ARGP_ERR_UNKNOWN;
612 }
613
614 return 0;
615}
616
617
618static hp_timing_t
619get_clockfreq (void)
620{
621 /* We read the information from the /proc filesystem. It contains at
622 least one line like
623 cpu MHz : 497.840237
624 or also
625 cpu MHz : 497.841
626 We search for this line and convert the number in an integer. */
627 static hp_timing_t result;
628 int fd;
629
630 /* If this function was called before, we know the result. */
631 if (result != 0)
632 return result;
633
634 fd = open (file: "/proc/cpuinfo", O_RDONLY);
635 if (__glibc_likely (fd != -1))
636 {
637 /* XXX AFAIK the /proc filesystem can generate "files" only up
638 to a size of 4096 bytes. */
639 char buf[4096];
640 ssize_t n;
641
642 n = read (fd, buf, sizeof buf);
643 if (__builtin_expect (n, 1) > 0)
644 {
645 char *mhz = memmem (buf, n, "cpu MHz", 7);
646
647 if (__glibc_likely (mhz != NULL))
648 {
649 char *endp = buf + n;
650 int seen_decpoint = 0;
651 int ndigits = 0;
652
653 /* Search for the beginning of the string. */
654 while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
655 ++mhz;
656
657 while (mhz < endp && *mhz != '\n')
658 {
659 if (*mhz >= '0' && *mhz <= '9')
660 {
661 result *= 10;
662 result += *mhz - '0';
663 if (seen_decpoint)
664 ++ndigits;
665 }
666 else if (*mhz == '.')
667 seen_decpoint = 1;
668
669 ++mhz;
670 }
671
672 /* Compensate for missing digits at the end. */
673 while (ndigits++ < 6)
674 result *= 10;
675 }
676 }
677
678 close (fd: fd);
679 }
680
681 return result;
682}
683
684
685int
686clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
687{
688 /* We don't allow any process ID but our own. */
689 if (pid != 0 && pid != getpid ())
690 return EPERM;
691
692#ifdef CLOCK_PROCESS_CPUTIME_ID
693 /* Store the number. */
694 *clock_id = CLOCK_PROCESS_CPUTIME_ID;
695
696 return 0;
697#else
698 /* We don't have a timer for that. */
699 return ENOENT;
700#endif
701}
702
703
704#ifdef i386
705#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
706#elif defined __x86_64__
707# define HP_TIMING_NOW(Var) \
708 ({ unsigned int _hi, _lo; \
709 asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \
710 (Var) = ((unsigned long long int) _hi << 32) | _lo; })
711#elif defined __ia64__
712#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory")
713#else
714#error "HP_TIMING_NOW missing"
715#endif
716
717/* Get current value of CLOCK and store it in TP. */
718int
719clock_gettime (clockid_t clock_id, struct timespec *tp)
720{
721 int retval = -1;
722
723 switch (clock_id)
724 {
725 case CLOCK_PROCESS_CPUTIME_ID:
726 {
727
728 static hp_timing_t freq;
729 hp_timing_t tsc;
730
731 /* Get the current counter. */
732 HP_TIMING_NOW (tsc);
733
734 if (freq == 0)
735 {
736 freq = get_clockfreq ();
737 if (freq == 0)
738 return EINVAL;
739 }
740
741 /* Compute the seconds. */
742 tp->tv_sec = tsc / freq;
743
744 /* And the nanoseconds. This computation should be stable until
745 we get machines with about 16GHz frequency. */
746 tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;
747
748 retval = 0;
749 }
750 break;
751
752 default:
753 errno = EINVAL;
754 break;
755 }
756
757 return retval;
758}
759

source code of glibc/nptl/perf.c