1// z_Linux_asm.S: - microtasking routines specifically
2// written for Intel platforms running Linux* OS
3
4//
5////===----------------------------------------------------------------------===//
6////
7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8//// See https://llvm.org/LICENSE.txt for license information.
9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10////
11////===----------------------------------------------------------------------===//
12//
13
14// -----------------------------------------------------------------------
15// macros
16// -----------------------------------------------------------------------
17
18#include "kmp_config.h"
19
20#if KMP_ARCH_X86 || KMP_ARCH_X86_64
21
22# if defined(__ELF__) && defined(__CET__) && defined(__has_include)
23# if __has_include(<cet.h>)
24# include <cet.h>
25# endif
26# endif
27
28# if !defined(_CET_ENDBR)
29# define _CET_ENDBR
30# endif
31
32# if KMP_MIC
33// the 'delay r16/r32/r64' should be used instead of the 'pause'.
34// The delay operation has the effect of removing the current thread from
35// the round-robin HT mechanism, and therefore speeds up the issue rate of
36// the other threads on the same core.
37//
38// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
39// barrier time to increase greatly for 3 or more threads per core.
40//
41// A value of 100 works pretty well for up to 4 threads per core, but isn't
42// quite as fast as 0 for 2 threads per core.
43//
44// We need to check what happens for oversubscription / > 4 threads per core.
45// It is possible that we need to pass the delay value in as a parameter
46// that the caller determines based on the total # threads / # cores.
47//
48//.macro pause_op
49// mov $100, %rax
50// delay %rax
51//.endm
52# else
53# define pause_op .byte 0xf3,0x90
54# endif // KMP_MIC
55
56# if KMP_OS_DARWIN
57# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
58# define KMP_LABEL(x) L_##x // form the name of label
59.macro KMP_CFI_DEF_OFFSET
60.endmacro
61.macro KMP_CFI_OFFSET
62.endmacro
63.macro KMP_CFI_REGISTER
64.endmacro
65.macro KMP_CFI_DEF
66.endmacro
67.macro ALIGN
68 .align $0
69.endmacro
70.macro DEBUG_INFO
71/* Not sure what .size does in icc, not sure if we need to do something
72 similar for OS X*.
73*/
74.endmacro
75.macro PROC
76 ALIGN 4
77 .globl KMP_PREFIX_UNDERSCORE($0)
78KMP_PREFIX_UNDERSCORE($0):
79 _CET_ENDBR
80.endmacro
81# else // KMP_OS_DARWIN
82# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
83// Format labels so that they don't override function names in gdb's backtraces
84// MIC assembler doesn't accept .L syntax, the L works fine there (as well as
85// on OS X*)
86# if KMP_MIC
87# define KMP_LABEL(x) L_##x // local label
88# else
89# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
90# endif // KMP_MIC
91.macro ALIGN size
92 .align 1<<(\size)
93.endm
94.macro DEBUG_INFO proc
95 .cfi_endproc
96// Not sure why we need .type and .size for the functions
97 .align 16
98 .type \proc,@function
99 .size \proc,.-\proc
100.endm
101.macro PROC proc
102 ALIGN 4
103 .globl KMP_PREFIX_UNDERSCORE(\proc)
104KMP_PREFIX_UNDERSCORE(\proc):
105 .cfi_startproc
106 _CET_ENDBR
107.endm
108.macro KMP_CFI_DEF_OFFSET sz
109 .cfi_def_cfa_offset \sz
110.endm
111.macro KMP_CFI_OFFSET reg, sz
112 .cfi_offset \reg,\sz
113.endm
114.macro KMP_CFI_REGISTER reg
115 .cfi_def_cfa_register \reg
116.endm
117.macro KMP_CFI_DEF reg, sz
118 .cfi_def_cfa \reg,\sz
119.endm
120# endif // KMP_OS_DARWIN
121#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
122
123#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
124
125# if KMP_OS_DARWIN
126# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
127# define KMP_LABEL(x) L_##x // form the name of label
128
129.macro ALIGN
130 .align $0
131.endmacro
132
133.macro DEBUG_INFO
134/* Not sure what .size does in icc, not sure if we need to do something
135 similar for OS X*.
136*/
137.endmacro
138
139.macro PROC
140 ALIGN 4
141 .globl KMP_PREFIX_UNDERSCORE($0)
142KMP_PREFIX_UNDERSCORE($0):
143.endmacro
144# elif KMP_OS_WINDOWS
145# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols
146// Format labels so that they don't override function names in gdb's backtraces
147# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
148
149.macro ALIGN size
150 .align 1<<(\size)
151.endm
152
153.macro DEBUG_INFO proc
154 ALIGN 2
155.endm
156
157.macro PROC proc
158 ALIGN 2
159 .globl KMP_PREFIX_UNDERSCORE(\proc)
160KMP_PREFIX_UNDERSCORE(\proc):
161.endm
162# else // KMP_OS_DARWIN || KMP_OS_WINDOWS
163# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
164// Format labels so that they don't override function names in gdb's backtraces
165# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
166
167.macro ALIGN size
168 .align 1<<(\size)
169.endm
170
171.macro DEBUG_INFO proc
172 .cfi_endproc
173// Not sure why we need .type and .size for the functions
174 ALIGN 2
175#if KMP_ARCH_ARM
176 .type \proc,%function
177#else
178 .type \proc,@function
179#endif
180 .size \proc,.-\proc
181.endm
182
183.macro PROC proc
184 ALIGN 2
185 .globl KMP_PREFIX_UNDERSCORE(\proc)
186KMP_PREFIX_UNDERSCORE(\proc):
187 .cfi_startproc
188.endm
189# endif // KMP_OS_DARWIN
190
191# if KMP_OS_LINUX
192// BTI and PAC gnu property note
193# define NT_GNU_PROPERTY_TYPE_0 5
194# define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
195# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
196# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
197
198# define GNU_PROPERTY(type, value) \
199 .pushsection .note.gnu.property, "a"; \
200 .p2align 3; \
201 .word 4; \
202 .word 16; \
203 .word NT_GNU_PROPERTY_TYPE_0; \
204 .asciz "GNU"; \
205 .word type; \
206 .word 4; \
207 .word value; \
208 .word 0; \
209 .popsection
210# endif
211
212# if defined(__ARM_FEATURE_BTI_DEFAULT)
213# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
214# else
215# define BTI_FLAG 0
216# endif
217# if __ARM_FEATURE_PAC_DEFAULT & 3
218# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
219# else
220# define PAC_FLAG 0
221# endif
222
223# if (BTI_FLAG | PAC_FLAG) != 0
224# if PAC_FLAG != 0
225# define PACBTI_C hint #25
226# define PACBTI_RET hint #29
227# else
228# define PACBTI_C hint #34
229# define PACBTI_RET
230# endif
231# define GNU_PROPERTY_BTI_PAC \
232 GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
233# else
234# define PACBTI_C
235# define PACBTI_RET
236# define GNU_PROPERTY_BTI_PAC
237# endif
238#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
239
240.macro COMMON name, size, align_power
241#if KMP_OS_DARWIN
242 .comm \name, \size
243#elif KMP_OS_WINDOWS
244 .comm \name, \size, \align_power
245#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS
246 .comm \name, \size, (1<<(\align_power))
247#endif
248.endm
249
250// -----------------------------------------------------------------------
251// data
252// -----------------------------------------------------------------------
253
254#ifdef KMP_GOMP_COMPAT
255
256// Support for unnamed common blocks.
257//
258// Because the symbol ".gomp_critical_user_" contains a ".", we have to
259// put this stuff in assembly.
260
261# if KMP_ARCH_X86
262# if KMP_OS_DARWIN
263 .data
264 .comm .gomp_critical_user_,32
265 .data
266 .globl ___kmp_unnamed_critical_addr
267___kmp_unnamed_critical_addr:
268 .long .gomp_critical_user_
269# else /* Linux* OS */
270 .data
271 .comm .gomp_critical_user_,32,8
272 .data
273 ALIGN 4
274 .global __kmp_unnamed_critical_addr
275__kmp_unnamed_critical_addr:
276 .4byte .gomp_critical_user_
277 .type __kmp_unnamed_critical_addr,@object
278 .size __kmp_unnamed_critical_addr,4
279# endif /* KMP_OS_DARWIN */
280# endif /* KMP_ARCH_X86 */
281
282# if KMP_ARCH_X86_64
283# if KMP_OS_DARWIN
284 .data
285 .comm .gomp_critical_user_,32
286 .data
287 .globl ___kmp_unnamed_critical_addr
288___kmp_unnamed_critical_addr:
289 .quad .gomp_critical_user_
290# else /* Linux* OS */
291 .data
292 .comm .gomp_critical_user_,32,8
293 .data
294 ALIGN 8
295 .global __kmp_unnamed_critical_addr
296__kmp_unnamed_critical_addr:
297 .8byte .gomp_critical_user_
298 .type __kmp_unnamed_critical_addr,@object
299 .size __kmp_unnamed_critical_addr,8
300# endif /* KMP_OS_DARWIN */
301# endif /* KMP_ARCH_X86_64 */
302
303#endif /* KMP_GOMP_COMPAT */
304
305
306#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
307
308// -----------------------------------------------------------------------
309// microtasking routines specifically written for IA-32 architecture
310// running Linux* OS
311// -----------------------------------------------------------------------
312
313 .ident "Intel Corporation"
314 .data
315 ALIGN 4
316// void
317// __kmp_x86_pause( void );
318
319 .text
320 PROC __kmp_x86_pause
321
322 pause_op
323 ret
324
325 DEBUG_INFO __kmp_x86_pause
326
327# if !KMP_ASM_INTRINS
328
329//------------------------------------------------------------------------
330// kmp_int32
331// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
332
333 PROC __kmp_test_then_add32
334
335 movl 4(%esp), %ecx
336 movl 8(%esp), %eax
337 lock
338 xaddl %eax,(%ecx)
339 ret
340
341 DEBUG_INFO __kmp_test_then_add32
342
343//------------------------------------------------------------------------
344// FUNCTION __kmp_xchg_fixed8
345//
346// kmp_int32
347// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
348//
349// parameters:
350// p: 4(%esp)
351// d: 8(%esp)
352//
353// return: %al
354 PROC __kmp_xchg_fixed8
355
356 movl 4(%esp), %ecx // "p"
357 movb 8(%esp), %al // "d"
358
359 lock
360 xchgb %al,(%ecx)
361 ret
362
363 DEBUG_INFO __kmp_xchg_fixed8
364
365
366//------------------------------------------------------------------------
367// FUNCTION __kmp_xchg_fixed16
368//
369// kmp_int16
370// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
371//
372// parameters:
373// p: 4(%esp)
374// d: 8(%esp)
375// return: %ax
376 PROC __kmp_xchg_fixed16
377
378 movl 4(%esp), %ecx // "p"
379 movw 8(%esp), %ax // "d"
380
381 lock
382 xchgw %ax,(%ecx)
383 ret
384
385 DEBUG_INFO __kmp_xchg_fixed16
386
387
388//------------------------------------------------------------------------
389// FUNCTION __kmp_xchg_fixed32
390//
391// kmp_int32
392// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
393//
394// parameters:
395// p: 4(%esp)
396// d: 8(%esp)
397//
398// return: %eax
399 PROC __kmp_xchg_fixed32
400
401 movl 4(%esp), %ecx // "p"
402 movl 8(%esp), %eax // "d"
403
404 lock
405 xchgl %eax,(%ecx)
406 ret
407
408 DEBUG_INFO __kmp_xchg_fixed32
409
410
411// kmp_int8
412// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
413 PROC __kmp_compare_and_store8
414
415 movl 4(%esp), %ecx
416 movb 8(%esp), %al
417 movb 12(%esp), %dl
418 lock
419 cmpxchgb %dl,(%ecx)
420 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
421 and $1, %eax // sign extend previous instruction
422 ret
423
424 DEBUG_INFO __kmp_compare_and_store8
425
426// kmp_int16
427// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
428 PROC __kmp_compare_and_store16
429
430 movl 4(%esp), %ecx
431 movw 8(%esp), %ax
432 movw 12(%esp), %dx
433 lock
434 cmpxchgw %dx,(%ecx)
435 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
436 and $1, %eax // sign extend previous instruction
437 ret
438
439 DEBUG_INFO __kmp_compare_and_store16
440
441// kmp_int32
442// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
443 PROC __kmp_compare_and_store32
444
445 movl 4(%esp), %ecx
446 movl 8(%esp), %eax
447 movl 12(%esp), %edx
448 lock
449 cmpxchgl %edx,(%ecx)
450 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
451 and $1, %eax // sign extend previous instruction
452 ret
453
454 DEBUG_INFO __kmp_compare_and_store32
455
456// kmp_int32
457// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
458 PROC __kmp_compare_and_store64
459
460 pushl %ebp
461 movl %esp, %ebp
462 pushl %ebx
463 pushl %edi
464 movl 8(%ebp), %edi
465 movl 12(%ebp), %eax // "cv" low order word
466 movl 16(%ebp), %edx // "cv" high order word
467 movl 20(%ebp), %ebx // "sv" low order word
468 movl 24(%ebp), %ecx // "sv" high order word
469 lock
470 cmpxchg8b (%edi)
471 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
472 and $1, %eax // sign extend previous instruction
473 popl %edi
474 popl %ebx
475 movl %ebp, %esp
476 popl %ebp
477 ret
478
479 DEBUG_INFO __kmp_compare_and_store64
480
481// kmp_int8
482// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
483 PROC __kmp_compare_and_store_ret8
484
485 movl 4(%esp), %ecx
486 movb 8(%esp), %al
487 movb 12(%esp), %dl
488 lock
489 cmpxchgb %dl,(%ecx)
490 ret
491
492 DEBUG_INFO __kmp_compare_and_store_ret8
493
494// kmp_int16
495// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
496// kmp_int16 sv);
497 PROC __kmp_compare_and_store_ret16
498
499 movl 4(%esp), %ecx
500 movw 8(%esp), %ax
501 movw 12(%esp), %dx
502 lock
503 cmpxchgw %dx,(%ecx)
504 ret
505
506 DEBUG_INFO __kmp_compare_and_store_ret16
507
508// kmp_int32
509// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
510// kmp_int32 sv);
511 PROC __kmp_compare_and_store_ret32
512
513 movl 4(%esp), %ecx
514 movl 8(%esp), %eax
515 movl 12(%esp), %edx
516 lock
517 cmpxchgl %edx,(%ecx)
518 ret
519
520 DEBUG_INFO __kmp_compare_and_store_ret32
521
522// kmp_int64
523// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
524// kmp_int64 sv);
525 PROC __kmp_compare_and_store_ret64
526
527 pushl %ebp
528 movl %esp, %ebp
529 pushl %ebx
530 pushl %edi
531 movl 8(%ebp), %edi
532 movl 12(%ebp), %eax // "cv" low order word
533 movl 16(%ebp), %edx // "cv" high order word
534 movl 20(%ebp), %ebx // "sv" low order word
535 movl 24(%ebp), %ecx // "sv" high order word
536 lock
537 cmpxchg8b (%edi)
538 popl %edi
539 popl %ebx
540 movl %ebp, %esp
541 popl %ebp
542 ret
543
544 DEBUG_INFO __kmp_compare_and_store_ret64
545
546
547//------------------------------------------------------------------------
548// FUNCTION __kmp_xchg_real32
549//
550// kmp_real32
551// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
552//
553// parameters:
554// addr: 4(%esp)
555// data: 8(%esp)
556//
557// return: %eax
558 PROC __kmp_xchg_real32
559
560 pushl %ebp
561 movl %esp, %ebp
562 subl $4, %esp
563 pushl %esi
564
565 movl 4(%ebp), %esi
566 flds (%esi)
567 // load <addr>
568 fsts -4(%ebp)
569 // store old value
570
571 movl 8(%ebp), %eax
572
573 lock
574 xchgl %eax, (%esi)
575
576 flds -4(%ebp)
577 // return old value
578
579 popl %esi
580 movl %ebp, %esp
581 popl %ebp
582 ret
583
584 DEBUG_INFO __kmp_xchg_real32
585
586# endif /* !KMP_ASM_INTRINS */
587
588//------------------------------------------------------------------------
589// int
590// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
591// int gtid, int tid,
592// int argc, void *p_argv[]
593// #if OMPT_SUPPORT
594// ,
595// void **exit_frame_ptr
596// #endif
597// ) {
598// #if OMPT_SUPPORT
599// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
600// #endif
601//
602// (*pkfn)( & gtid, & tid, argv[0], ... );
603// return 1;
604// }
605
606// -- Begin __kmp_invoke_microtask
607// mark_begin;
608 PROC __kmp_invoke_microtask
609
610 pushl %ebp
611 KMP_CFI_DEF_OFFSET 8
612 KMP_CFI_OFFSET ebp,-8
613 movl %esp,%ebp // establish the base pointer for this routine.
614 KMP_CFI_REGISTER ebp
615 subl $8,%esp // allocate space for two local variables.
616 // These varibales are:
617 // argv: -4(%ebp)
618 // temp: -8(%ebp)
619 //
620 pushl %ebx // save %ebx to use during this routine
621 //
622#if OMPT_SUPPORT
623 movl 28(%ebp),%ebx // get exit_frame address
624 movl %ebp,(%ebx) // save exit_frame
625#endif
626
627 movl 20(%ebp),%ebx // Stack alignment - # args
628 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
629 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
630 movl %esp,%eax //
631 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
632 movl %eax,%ebx // Save to %ebx
633 andl $0xFFFFFF80,%eax // mask off 7 bits
634 subl %eax,%ebx // Amount to subtract from %esp
635 subl %ebx,%esp // Prepare the stack ptr --
636 // now it will be aligned on 128-byte boundary at the call
637
638 movl 24(%ebp),%eax // copy from p_argv[]
639 movl %eax,-4(%ebp) // into the local variable *argv.
640
641 movl 20(%ebp),%ebx // argc is 20(%ebp)
642 shll $2,%ebx
643
644KMP_LABEL(invoke_2):
645 cmpl $0,%ebx
646 jg KMP_LABEL(invoke_4)
647 jmp KMP_LABEL(invoke_3)
648 ALIGN 2
649KMP_LABEL(invoke_4):
650 movl -4(%ebp),%eax
651 subl $4,%ebx // decrement argc.
652 addl %ebx,%eax // index into argv.
653 movl (%eax),%edx
654 pushl %edx
655
656 jmp KMP_LABEL(invoke_2)
657 ALIGN 2
658KMP_LABEL(invoke_3):
659 leal 16(%ebp),%eax // push & tid
660 pushl %eax
661
662 leal 12(%ebp),%eax // push & gtid
663 pushl %eax
664
665 movl 8(%ebp),%ebx
666 call *%ebx // call (*pkfn)();
667
668 movl $1,%eax // return 1;
669
670 movl -12(%ebp),%ebx // restore %ebx
671 leave
672 KMP_CFI_DEF esp,4
673 ret
674
675 DEBUG_INFO __kmp_invoke_microtask
676// -- End __kmp_invoke_microtask
677
678
679// kmp_uint64
680// __kmp_hardware_timestamp(void)
681 PROC __kmp_hardware_timestamp
682 rdtsc
683 ret
684
685 DEBUG_INFO __kmp_hardware_timestamp
686// -- End __kmp_hardware_timestamp
687
688#endif /* KMP_ARCH_X86 */
689
690
691#if KMP_ARCH_X86_64
692
693// -----------------------------------------------------------------------
694// microtasking routines specifically written for IA-32 architecture and
695// Intel(R) 64 running Linux* OS
696// -----------------------------------------------------------------------
697
698// -- Machine type P
699// mark_description "Intel Corporation";
700 .ident "Intel Corporation"
701// -- .file "z_Linux_asm.S"
702 .data
703 ALIGN 4
704
705// To prevent getting our code into .data section .text added to every routine
706// definition for x86_64.
707//------------------------------------------------------------------------
708# if !KMP_ASM_INTRINS
709
710//------------------------------------------------------------------------
711// FUNCTION __kmp_test_then_add32
712//
713// kmp_int32
714// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
715//
716// parameters:
717// p: %rdi
718// d: %esi
719//
720// return: %eax
721 .text
722 PROC __kmp_test_then_add32
723
724 movl %esi, %eax // "d"
725 lock
726 xaddl %eax,(%rdi)
727 ret
728
729 DEBUG_INFO __kmp_test_then_add32
730
731
732//------------------------------------------------------------------------
733// FUNCTION __kmp_test_then_add64
734//
735// kmp_int64
736// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
737//
738// parameters:
739// p: %rdi
740// d: %rsi
741// return: %rax
742 .text
743 PROC __kmp_test_then_add64
744
745 movq %rsi, %rax // "d"
746 lock
747 xaddq %rax,(%rdi)
748 ret
749
750 DEBUG_INFO __kmp_test_then_add64
751
752
753//------------------------------------------------------------------------
754// FUNCTION __kmp_xchg_fixed8
755//
756// kmp_int32
757// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
758//
759// parameters:
760// p: %rdi
761// d: %sil
762//
763// return: %al
764 .text
765 PROC __kmp_xchg_fixed8
766
767 movb %sil, %al // "d"
768
769 lock
770 xchgb %al,(%rdi)
771 ret
772
773 DEBUG_INFO __kmp_xchg_fixed8
774
775
776//------------------------------------------------------------------------
777// FUNCTION __kmp_xchg_fixed16
778//
779// kmp_int16
780// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
781//
782// parameters:
783// p: %rdi
784// d: %si
785// return: %ax
786 .text
787 PROC __kmp_xchg_fixed16
788
789 movw %si, %ax // "d"
790
791 lock
792 xchgw %ax,(%rdi)
793 ret
794
795 DEBUG_INFO __kmp_xchg_fixed16
796
797
798//------------------------------------------------------------------------
799// FUNCTION __kmp_xchg_fixed32
800//
801// kmp_int32
802// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
803//
804// parameters:
805// p: %rdi
806// d: %esi
807//
808// return: %eax
809 .text
810 PROC __kmp_xchg_fixed32
811
812 movl %esi, %eax // "d"
813
814 lock
815 xchgl %eax,(%rdi)
816 ret
817
818 DEBUG_INFO __kmp_xchg_fixed32
819
820
821//------------------------------------------------------------------------
822// FUNCTION __kmp_xchg_fixed64
823//
824// kmp_int64
825// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
826//
827// parameters:
828// p: %rdi
829// d: %rsi
830// return: %rax
831 .text
832 PROC __kmp_xchg_fixed64
833
834 movq %rsi, %rax // "d"
835
836 lock
837 xchgq %rax,(%rdi)
838 ret
839
840 DEBUG_INFO __kmp_xchg_fixed64
841
842
843//------------------------------------------------------------------------
844// FUNCTION __kmp_compare_and_store8
845//
846// kmp_int8
847// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
848//
849// parameters:
850// p: %rdi
851// cv: %esi
852// sv: %edx
853//
854// return: %eax
855 .text
856 PROC __kmp_compare_and_store8
857
858 movb %sil, %al // "cv"
859 lock
860 cmpxchgb %dl,(%rdi)
861 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
862 andq $1, %rax // sign extend previous instruction for return value
863 ret
864
865 DEBUG_INFO __kmp_compare_and_store8
866
867
868//------------------------------------------------------------------------
869// FUNCTION __kmp_compare_and_store16
870//
871// kmp_int16
872// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
873//
874// parameters:
875// p: %rdi
876// cv: %si
877// sv: %dx
878//
879// return: %eax
880 .text
881 PROC __kmp_compare_and_store16
882
883 movw %si, %ax // "cv"
884 lock
885 cmpxchgw %dx,(%rdi)
886 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
887 andq $1, %rax // sign extend previous instruction for return value
888 ret
889
890 DEBUG_INFO __kmp_compare_and_store16
891
892
893//------------------------------------------------------------------------
894// FUNCTION __kmp_compare_and_store32
895//
896// kmp_int32
897// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
898//
899// parameters:
900// p: %rdi
901// cv: %esi
902// sv: %edx
903//
904// return: %eax
905 .text
906 PROC __kmp_compare_and_store32
907
908 movl %esi, %eax // "cv"
909 lock
910 cmpxchgl %edx,(%rdi)
911 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
912 andq $1, %rax // sign extend previous instruction for return value
913 ret
914
915 DEBUG_INFO __kmp_compare_and_store32
916
917
918//------------------------------------------------------------------------
919// FUNCTION __kmp_compare_and_store64
920//
921// kmp_int32
922// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
923//
924// parameters:
925// p: %rdi
926// cv: %rsi
927// sv: %rdx
928// return: %eax
929 .text
930 PROC __kmp_compare_and_store64
931
932 movq %rsi, %rax // "cv"
933 lock
934 cmpxchgq %rdx,(%rdi)
935 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
936 andq $1, %rax // sign extend previous instruction for return value
937 ret
938
939 DEBUG_INFO __kmp_compare_and_store64
940
941//------------------------------------------------------------------------
942// FUNCTION __kmp_compare_and_store_ret8
943//
944// kmp_int8
945// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
946//
947// parameters:
948// p: %rdi
949// cv: %esi
950// sv: %edx
951//
952// return: %eax
953 .text
954 PROC __kmp_compare_and_store_ret8
955
956 movb %sil, %al // "cv"
957 lock
958 cmpxchgb %dl,(%rdi)
959 ret
960
961 DEBUG_INFO __kmp_compare_and_store_ret8
962
963
964//------------------------------------------------------------------------
965// FUNCTION __kmp_compare_and_store_ret16
966//
967// kmp_int16
968// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
969//
970// parameters:
971// p: %rdi
972// cv: %si
973// sv: %dx
974//
975// return: %eax
976 .text
977 PROC __kmp_compare_and_store_ret16
978
979 movw %si, %ax // "cv"
980 lock
981 cmpxchgw %dx,(%rdi)
982 ret
983
984 DEBUG_INFO __kmp_compare_and_store_ret16
985
986
987//------------------------------------------------------------------------
988// FUNCTION __kmp_compare_and_store_ret32
989//
990// kmp_int32
991// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
992//
993// parameters:
994// p: %rdi
995// cv: %esi
996// sv: %edx
997//
998// return: %eax
999 .text
1000 PROC __kmp_compare_and_store_ret32
1001
1002 movl %esi, %eax // "cv"
1003 lock
1004 cmpxchgl %edx,(%rdi)
1005 ret
1006
1007 DEBUG_INFO __kmp_compare_and_store_ret32
1008
1009
1010//------------------------------------------------------------------------
1011// FUNCTION __kmp_compare_and_store_ret64
1012//
1013// kmp_int64
1014// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1015//
1016// parameters:
1017// p: %rdi
1018// cv: %rsi
1019// sv: %rdx
1020// return: %eax
1021 .text
1022 PROC __kmp_compare_and_store_ret64
1023
1024 movq %rsi, %rax // "cv"
1025 lock
1026 cmpxchgq %rdx,(%rdi)
1027 ret
1028
1029 DEBUG_INFO __kmp_compare_and_store_ret64
1030
1031# endif /* !KMP_ASM_INTRINS */
1032
1033
1034# if !KMP_MIC
1035
1036# if !KMP_ASM_INTRINS
1037
1038//------------------------------------------------------------------------
1039// FUNCTION __kmp_xchg_real32
1040//
1041// kmp_real32
1042// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
1043//
1044// parameters:
1045// addr: %rdi
1046// data: %xmm0 (lower 4 bytes)
1047//
1048// return: %xmm0 (lower 4 bytes)
1049 .text
1050 PROC __kmp_xchg_real32
1051
1052 movd %xmm0, %eax // load "data" to eax
1053
1054 lock
1055 xchgl %eax, (%rdi)
1056
1057 movd %eax, %xmm0 // load old value into return register
1058
1059 ret
1060
1061 DEBUG_INFO __kmp_xchg_real32
1062
1063
1064//------------------------------------------------------------------------
1065// FUNCTION __kmp_xchg_real64
1066//
1067// kmp_real64
1068// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
1069//
1070// parameters:
1071// addr: %rdi
1072// data: %xmm0 (lower 8 bytes)
1073// return: %xmm0 (lower 8 bytes)
1074 .text
1075 PROC __kmp_xchg_real64
1076
1077 movd %xmm0, %rax // load "data" to rax
1078
1079 lock
1080 xchgq %rax, (%rdi)
1081
1082 movd %rax, %xmm0 // load old value into return register
1083 ret
1084
1085 DEBUG_INFO __kmp_xchg_real64
1086
1087
1088# endif /* !KMP_MIC */
1089
1090# endif /* !KMP_ASM_INTRINS */
1091
1092//------------------------------------------------------------------------
1093// int
1094// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1095// int gtid, int tid,
1096// int argc, void *p_argv[]
1097// #if OMPT_SUPPORT
1098// ,
1099// void **exit_frame_ptr
1100// #endif
1101// ) {
1102// #if OMPT_SUPPORT
1103// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1104// #endif
1105//
1106// (*pkfn)( & gtid, & tid, argv[0], ... );
1107// return 1;
1108// }
1109//
1110// note: at call to pkfn must have %rsp 128-byte aligned for compiler
1111//
1112// parameters:
1113// %rdi: pkfn
1114// %esi: gtid
1115// %edx: tid
1116// %ecx: argc
1117// %r8: p_argv
1118// %r9: &exit_frame
1119//
1120// locals:
1121// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1122// __tid: tid parm pushed on stack so can pass &tid to pkfn
1123//
1124// reg temps:
1125// %rax: used all over the place
1126// %rdx: used in stack pointer alignment calculation
1127// %r11: used to traverse p_argv array
1128// %rsi: used as temporary for stack parameters
1129// used as temporary for number of pkfn parms to push
1130// %rbx: used to hold pkfn address, and zero constant, callee-save
1131//
1132// return: %eax (always 1/TRUE)
1133__gtid = -16
1134__tid = -24
1135
1136// -- Begin __kmp_invoke_microtask
1137// mark_begin;
1138 .text
1139 PROC __kmp_invoke_microtask
1140
1141 pushq %rbp // save base pointer
1142 KMP_CFI_DEF_OFFSET 16
1143 KMP_CFI_OFFSET rbp,-16
1144 movq %rsp,%rbp // establish the base pointer for this routine.
1145 KMP_CFI_REGISTER rbp
1146
1147#if OMPT_SUPPORT
1148 movq %rbp, (%r9) // save exit_frame
1149#endif
1150
1151 pushq %rbx // %rbx is callee-saved register
1152 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
1153 pushq %rdx // Put tid on stack so can pass &tid to pkfn
1154
1155 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
1156 movq $0, %rbx // constant for cmovs later
1157 subq $4, %rax // subtract four args passed in registers to pkfn
1158#if KMP_MIC
1159 js KMP_LABEL(kmp_0) // jump to movq
1160 jmp KMP_LABEL(kmp_0_exit) // jump ahead
1161KMP_LABEL(kmp_0):
1162 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
1163KMP_LABEL(kmp_0_exit):
1164#else
1165 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
1166#endif // KMP_MIC
1167
1168 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
1169 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
1170
1171 movq %rsp, %rdx //
1172 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
1173 // without align, stack ptr would be this
1174 movq %rdx, %rax // Save to %rax
1175
1176 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
1177 subq %rax, %rdx // Amount to subtract from %rsp
1178 subq %rdx, %rsp // Prepare the stack ptr --
1179 // now %rsp will align to 128-byte boundary at call site
1180
1181 // setup pkfn parameter reg and stack
1182 movq %rcx, %rax // argc -> %rax
1183 cmpq $0, %rsi
1184 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
1185 shlq $3, %rcx // argc*8 -> %rcx
1186 movq %r8, %rdx // p_argv -> %rdx
1187 addq %rcx, %rdx // &p_argv[argc] -> %rdx
1188
1189 movq %rsi, %rcx // max (0, argc-4) -> %rcx
1190
1191KMP_LABEL(kmp_invoke_push_parms):
1192 // push nth - 7th parms to pkfn on stack
1193 subq $8, %rdx // decrement p_argv pointer to previous parm
1194 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
1195 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
1196 subl $1, %ecx
1197
1198// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
1199// if the name of the label that is an operand of this jecxz starts with a dot (".");
1200// Apple's linker does not support 1-byte length relocation;
1201// Resolution: replace all .labelX entries with L_labelX.
1202
1203 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
1204 jmp KMP_LABEL(kmp_invoke_push_parms)
1205 ALIGN 3
1206KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
1207 // order here is important to avoid trashing
1208 // registers used for both input and output parms!
1209 movq %rdi, %rbx // pkfn -> %rbx
1210 leaq __gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
1211 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
1212 // Check if argc is 0
1213 cmpq $0, %rax
1214 je KMP_LABEL(kmp_no_args) // Jump ahead
1215
1216 movq %r8, %r11 // p_argv -> %r11
1217
1218#if KMP_MIC
1219 cmpq $4, %rax // argc >= 4?
1220 jns KMP_LABEL(kmp_4) // jump to movq
1221 jmp KMP_LABEL(kmp_4_exit) // jump ahead
1222KMP_LABEL(kmp_4):
1223 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1224KMP_LABEL(kmp_4_exit):
1225
1226 cmpq $3, %rax // argc >= 3?
1227 jns KMP_LABEL(kmp_3) // jump to movq
1228 jmp KMP_LABEL(kmp_3_exit) // jump ahead
1229KMP_LABEL(kmp_3):
1230 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1231KMP_LABEL(kmp_3_exit):
1232
1233 cmpq $2, %rax // argc >= 2?
1234 jns KMP_LABEL(kmp_2) // jump to movq
1235 jmp KMP_LABEL(kmp_2_exit) // jump ahead
1236KMP_LABEL(kmp_2):
1237 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1238KMP_LABEL(kmp_2_exit):
1239
1240 cmpq $1, %rax // argc >= 1?
1241 jns KMP_LABEL(kmp_1) // jump to movq
1242 jmp KMP_LABEL(kmp_1_exit) // jump ahead
1243KMP_LABEL(kmp_1):
1244 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
1245KMP_LABEL(kmp_1_exit):
1246#else
1247 cmpq $4, %rax // argc >= 4?
1248 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1249
1250 cmpq $3, %rax // argc >= 3?
1251 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1252
1253 cmpq $2, %rax // argc >= 2?
1254 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1255
1256 cmpq $1, %rax // argc >= 1?
1257 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
1258#endif // KMP_MIC
1259
1260KMP_LABEL(kmp_no_args):
1261 call *%rbx // call (*pkfn)();
1262 movq $1, %rax // move 1 into return register;
1263
1264 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
1265 movq %rbp, %rsp // restore stack pointer
1266 popq %rbp // restore frame pointer
1267 KMP_CFI_DEF rsp,8
1268 ret
1269
1270 DEBUG_INFO __kmp_invoke_microtask
1271// -- End __kmp_invoke_microtask
1272
1273// kmp_uint64
1274// __kmp_hardware_timestamp(void)
1275 .text
1276 PROC __kmp_hardware_timestamp
1277 rdtsc
1278 shlq $32, %rdx
1279 orq %rdx, %rax
1280 ret
1281
1282 DEBUG_INFO __kmp_hardware_timestamp
1283// -- End __kmp_hardware_timestamp
1284
1285//------------------------------------------------------------------------
1286// FUNCTION __kmp_bsr32
1287//
1288// int
1289// __kmp_bsr32( int );
1290 .text
1291 PROC __kmp_bsr32
1292
1293 bsr %edi,%eax
1294 ret
1295
1296 DEBUG_INFO __kmp_bsr32
1297
1298// -----------------------------------------------------------------------
1299#endif /* KMP_ARCH_X86_64 */
1300
1301// '
1302#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
1303
1304//------------------------------------------------------------------------
1305// int
1306// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1307// int gtid, int tid,
1308// int argc, void *p_argv[]
1309// #if OMPT_SUPPORT
1310// ,
1311// void **exit_frame_ptr
1312// #endif
1313// ) {
1314// #if OMPT_SUPPORT
1315// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1316// #endif
1317//
1318// (*pkfn)( & gtid, & tid, argv[0], ... );
1319//
1320// // FIXME: This is done at call-site and can be removed here.
1321// #if OMPT_SUPPORT
1322// *exit_frame_ptr = 0;
1323// #endif
1324//
1325// return 1;
1326// }
1327//
1328// parameters:
1329// x0: pkfn
1330// w1: gtid
1331// w2: tid
1332// w3: argc
1333// x4: p_argv
1334// x5: &exit_frame
1335//
1336// locals:
1337// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1338// __tid: tid parm pushed on stack so can pass &tid to pkfn
1339//
1340// reg temps:
1341// x8: used to hold pkfn address
1342// w9: used as temporary for number of pkfn parms
1343// x10: used to traverse p_argv array
1344// x11: used as temporary for stack placement calculation
1345// x12: used as temporary for stack parameters
1346// x19: used to preserve exit_frame_ptr, callee-save
1347//
1348// return: w0 (always 1/TRUE)
1349//
1350
1351__gtid = 4
1352__tid = 8
1353
1354// -- Begin __kmp_invoke_microtask
1355// mark_begin;
1356 .text
1357 PROC __kmp_invoke_microtask
1358 PACBTI_C
1359
1360 stp x29, x30, [sp, #-16]!
1361# if OMPT_SUPPORT
1362 stp x19, x20, [sp, #-16]!
1363# endif
1364 mov x29, sp
1365
1366 orr w9, wzr, #1
1367 add w9, w9, w3, lsr #1
1368 sub sp, sp, w9, uxtw #4
1369 mov x11, sp
1370
1371 mov x8, x0
1372 str w1, [x29, #-__gtid]
1373 str w2, [x29, #-__tid]
1374 mov w9, w3
1375 mov x10, x4
1376# if OMPT_SUPPORT
1377 mov x19, x5
1378 str x29, [x19]
1379# endif
1380
1381 sub x0, x29, #__gtid
1382 sub x1, x29, #__tid
1383
1384 cbz w9, KMP_LABEL(kmp_1)
1385 ldr x2, [x10]
1386
1387 sub w9, w9, #1
1388 cbz w9, KMP_LABEL(kmp_1)
1389 ldr x3, [x10, #8]!
1390
1391 sub w9, w9, #1
1392 cbz w9, KMP_LABEL(kmp_1)
1393 ldr x4, [x10, #8]!
1394
1395 sub w9, w9, #1
1396 cbz w9, KMP_LABEL(kmp_1)
1397 ldr x5, [x10, #8]!
1398
1399 sub w9, w9, #1
1400 cbz w9, KMP_LABEL(kmp_1)
1401 ldr x6, [x10, #8]!
1402
1403 sub w9, w9, #1
1404 cbz w9, KMP_LABEL(kmp_1)
1405 ldr x7, [x10, #8]!
1406
1407KMP_LABEL(kmp_0):
1408 sub w9, w9, #1
1409 cbz w9, KMP_LABEL(kmp_1)
1410 ldr x12, [x10, #8]!
1411 str x12, [x11], #8
1412 b KMP_LABEL(kmp_0)
1413KMP_LABEL(kmp_1):
1414 blr x8
1415 orr w0, wzr, #1
1416 mov sp, x29
1417# if OMPT_SUPPORT
1418 str xzr, [x19]
1419 ldp x19, x20, [sp], #16
1420# endif
1421 ldp x29, x30, [sp], #16
1422 PACBTI_RET
1423 ret
1424
1425 DEBUG_INFO __kmp_invoke_microtask
1426// -- End __kmp_invoke_microtask
1427
1428#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */
1429
1430#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM
1431
1432//------------------------------------------------------------------------
1433// int
1434// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1435// int gtid, int tid,
1436// int argc, void *p_argv[]
1437// #if OMPT_SUPPORT
1438// ,
1439// void **exit_frame_ptr
1440// #endif
1441// ) {
1442// #if OMPT_SUPPORT
1443// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1444// #endif
1445//
1446// (*pkfn)( & gtid, & tid, argv[0], ... );
1447//
1448// // FIXME: This is done at call-site and can be removed here.
1449// #if OMPT_SUPPORT
1450// *exit_frame_ptr = 0;
1451// #endif
1452//
1453// return 1;
1454// }
1455//
1456// parameters:
1457// r0: pkfn
1458// r1: gtid
1459// r2: tid
1460// r3: argc
1461// r4(stack): p_argv
1462// r5(stack): &exit_frame
1463//
1464// locals:
1465// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1466// __tid: tid parm pushed on stack so can pass &tid to pkfn
1467//
1468// reg temps:
1469// r4: used to hold pkfn address
1470// r5: used as temporary for number of pkfn parms
1471// r6: used to traverse p_argv array
1472// r7: frame pointer (in some configurations)
1473// r8: used as temporary for stack placement calculation
1474// and as pointer to base of callee saved area
1475// r9: used as temporary for stack parameters
1476// r10: used to preserve exit_frame_ptr, callee-save
1477// r11: frame pointer (in some configurations)
1478//
1479// return: r0 (always 1/TRUE)
1480//
1481
1482__gtid = 4
1483__tid = 8
1484
1485// -- Begin __kmp_invoke_microtask
1486// mark_begin;
1487 .text
1488 PROC __kmp_invoke_microtask
1489
1490 // Pushing one extra register (r3) to keep the stack aligned
1491 // for when we call pkfn below
1492 push {r3-r11,lr}
1493 // Load p_argv and &exit_frame
1494 ldr r4, [sp, #10*4]
1495# if OMPT_SUPPORT
1496 ldr r5, [sp, #11*4]
1497# endif
1498
1499# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS)
1500# define FP r7
1501# define FPOFF 4*4
1502#else
1503# define FP r11
1504# define FPOFF 8*4
1505#endif
1506 add FP, sp, #FPOFF
1507# if OMPT_SUPPORT
1508 mov r10, r5
1509 str FP, [r10]
1510# endif
1511 mov r8, sp
1512
1513 // Calculate how much stack to allocate, in increments of 8 bytes.
1514 // We strictly need 4*(argc-2) bytes (2 arguments are passed in
1515 // registers) but allocate 4*argc for simplicity (to avoid needing
1516 // to handle the argc<2 cases). We align the number of bytes
1517 // allocated to 8 bytes, to keep the stack aligned. (Since we
1518 // already allocate more than enough, it's ok to round down
1519 // instead of up for the alignment.) We allocate another extra
1520 // 8 bytes for gtid and tid.
1521 mov r5, #1
1522 add r5, r5, r3, lsr #1
1523 sub sp, sp, r5, lsl #3
1524
1525 str r1, [r8, #-__gtid]
1526 str r2, [r8, #-__tid]
1527 mov r5, r3
1528 mov r6, r4
1529 mov r4, r0
1530
1531 // Prepare the first 2 parameters to pkfn - pointers to gtid and tid
1532 // in our stack frame.
1533 sub r0, r8, #__gtid
1534 sub r1, r8, #__tid
1535
1536 mov r8, sp
1537
1538 // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2
1539 cmp r5, #0
1540 beq KMP_LABEL(kmp_1)
1541 ldr r2, [r6]
1542
1543 subs r5, r5, #1
1544 beq KMP_LABEL(kmp_1)
1545 ldr r3, [r6, #4]!
1546
1547 // Loop, loading the rest of p_argv and writing the elements on the
1548 // stack.
1549KMP_LABEL(kmp_0):
1550 subs r5, r5, #1
1551 beq KMP_LABEL(kmp_1)
1552 ldr r12, [r6, #4]!
1553 str r12, [r8], #4
1554 b KMP_LABEL(kmp_0)
1555KMP_LABEL(kmp_1):
1556 blx r4
1557 mov r0, #1
1558
1559 sub r4, FP, #FPOFF
1560 mov sp, r4
1561# undef FP
1562# undef FPOFF
1563
1564# if OMPT_SUPPORT
1565 mov r1, #0
1566 str r1, [r10]
1567# endif
1568 pop {r3-r11,pc}
1569
1570 DEBUG_INFO __kmp_invoke_microtask
1571// -- End __kmp_invoke_microtask
1572
1573#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */
1574
1575#if KMP_ARCH_PPC64
1576
1577//------------------------------------------------------------------------
1578// int
1579// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1580// int gtid, int tid,
1581// int argc, void *p_argv[]
1582// #if OMPT_SUPPORT
1583// ,
1584// void **exit_frame_ptr
1585// #endif
1586// ) {
1587// #if OMPT_SUPPORT
1588// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1589// #endif
1590//
1591// (*pkfn)( & gtid, & tid, argv[0], ... );
1592//
1593// // FIXME: This is done at call-site and can be removed here.
1594// #if OMPT_SUPPORT
1595// *exit_frame_ptr = 0;
1596// #endif
1597//
1598// return 1;
1599// }
1600//
1601// parameters:
1602// r3: pkfn
1603// r4: gtid
1604// r5: tid
1605// r6: argc
1606// r7: p_argv
1607// r8: &exit_frame
1608//
1609// return: r3 (always 1/TRUE)
1610//
1611 .text
1612# if KMP_ARCH_PPC64_ELFv2
1613 .abiversion 2
1614# endif
1615 .globl __kmp_invoke_microtask
1616
1617# if KMP_ARCH_PPC64_ELFv2
1618 .p2align 4
1619# else
1620 .p2align 2
1621# endif
1622
1623 .type __kmp_invoke_microtask,@function
1624
1625# if KMP_ARCH_PPC64_ELFv2
1626__kmp_invoke_microtask:
1627.Lfunc_begin0:
1628.Lfunc_gep0:
1629 addis 2, 12, .TOC.-.Lfunc_gep0@ha
1630 addi 2, 2, .TOC.-.Lfunc_gep0@l
1631.Lfunc_lep0:
1632 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
1633# else
1634 .section .opd,"aw",@progbits
1635__kmp_invoke_microtask:
1636 .p2align 3
1637 .quad .Lfunc_begin0
1638 .quad .TOC.@tocbase
1639 .quad 0
1640 .text
1641.Lfunc_begin0:
1642# endif
1643
1644// -- Begin __kmp_invoke_microtask
1645// mark_begin;
1646
1647// We need to allocate a stack frame large enough to hold all of the parameters
1648// on the stack for the microtask plus what this function needs. That's 48
1649// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
1650// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
1651// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
1652// to save r30 to hold a copy of r8.
1653
1654 .cfi_startproc
1655 mflr 0
1656 std 31, -8(1)
1657 std 0, 16(1)
1658
1659// This is unusual because normally we'd set r31 equal to r1 after the stack
1660// frame is established. In this case, however, we need to dynamically compute
1661// the stack frame size, and so we keep a direct copy of r1 to access our
1662// register save areas and restore the r1 value before returning.
1663 mr 31, 1
1664 .cfi_def_cfa_register r31
1665 .cfi_offset r31, -8
1666 .cfi_offset lr, 16
1667
1668// Compute the size necessary for the local stack frame.
1669# if KMP_ARCH_PPC64_ELFv2
1670 li 12, 72
1671# else
1672 li 12, 88
1673# endif
1674 sldi 0, 6, 3
1675 add 12, 0, 12
1676 neg 12, 12
1677
1678// We need to make sure that the stack frame stays aligned (to 16 bytes).
1679 li 0, -16
1680 and 12, 0, 12
1681
1682// Establish the local stack frame.
1683 stdux 1, 1, 12
1684
1685# if OMPT_SUPPORT
1686 .cfi_offset r30, -16
1687 std 30, -16(31)
1688 std 1, 0(8)
1689 mr 30, 8
1690# endif
1691
1692// Store gtid and tid to the stack because they're passed by reference to the microtask.
1693 stw 4, -20(31)
1694 stw 5, -24(31)
1695
1696 mr 12, 6
1697 mr 4, 7
1698
1699 cmpwi 0, 12, 1
1700 blt 0, .Lcall
1701
1702 ld 5, 0(4)
1703
1704 cmpwi 0, 12, 2
1705 blt 0, .Lcall
1706
1707 ld 6, 8(4)
1708
1709 cmpwi 0, 12, 3
1710 blt 0, .Lcall
1711
1712 ld 7, 16(4)
1713
1714 cmpwi 0, 12, 4
1715 blt 0, .Lcall
1716
1717 ld 8, 24(4)
1718
1719 cmpwi 0, 12, 5
1720 blt 0, .Lcall
1721
1722 ld 9, 32(4)
1723
1724 cmpwi 0, 12, 6
1725 blt 0, .Lcall
1726
1727 ld 10, 40(4)
1728
1729 cmpwi 0, 12, 7
1730 blt 0, .Lcall
1731
1732// There are more than 6 microtask parameters, so we need to store the
1733// remainder to the stack.
1734 addi 12, 12, -6
1735 mtctr 12
1736
1737// These are set to 8 bytes before the first desired store address (we're using
1738// pre-increment loads and stores in the loop below). The parameter save area
1739// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
1740// 32 + 8*8 == 96 bytes above r1 for ELFv2.
1741 addi 4, 4, 40
1742# if KMP_ARCH_PPC64_ELFv2
1743 addi 12, 1, 88
1744# else
1745 addi 12, 1, 104
1746# endif
1747
1748.Lnext:
1749 ldu 0, 8(4)
1750 stdu 0, 8(12)
1751 bdnz .Lnext
1752
1753.Lcall:
1754# if KMP_ARCH_PPC64_ELFv2
1755 std 2, 24(1)
1756 mr 12, 3
1757#else
1758 std 2, 40(1)
1759// For ELFv1, we need to load the actual function address from the function descriptor.
1760 ld 12, 0(3)
1761 ld 2, 8(3)
1762 ld 11, 16(3)
1763#endif
1764
1765 addi 3, 31, -20
1766 addi 4, 31, -24
1767
1768 mtctr 12
1769 bctrl
1770# if KMP_ARCH_PPC64_ELFv2
1771 ld 2, 24(1)
1772# else
1773 ld 2, 40(1)
1774# endif
1775
1776# if OMPT_SUPPORT
1777 li 3, 0
1778 std 3, 0(30)
1779# endif
1780
1781 li 3, 1
1782
1783# if OMPT_SUPPORT
1784 ld 30, -16(31)
1785# endif
1786
1787 mr 1, 31
1788 ld 0, 16(1)
1789 ld 31, -8(1)
1790 mtlr 0
1791 blr
1792
1793 .long 0
1794 .quad 0
1795.Lfunc_end0:
1796 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
1797 .cfi_endproc
1798
1799// -- End __kmp_invoke_microtask
1800
1801#endif /* KMP_ARCH_PPC64 */
1802
1803#if KMP_ARCH_RISCV64
1804
1805//------------------------------------------------------------------------
1806//
1807// typedef void (*microtask_t)(int *gtid, int *tid, ...);
1808//
1809// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1810// void *p_argv[]
1811// #if OMPT_SUPPORT
1812// ,
1813// void **exit_frame_ptr
1814// #endif
1815// ) {
1816// #if OMPT_SUPPORT
1817// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1818// #endif
1819//
1820// (*pkfn)(&gtid, &tid, argv[0], ...);
1821//
1822// return 1;
1823// }
1824//
1825// Parameters:
1826// a0: pkfn
1827// a1: gtid
1828// a2: tid
1829// a3: argc
1830// a4: p_argv
1831// a5: exit_frame_ptr
1832//
1833// Locals:
1834// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
1835// __tid: tid param pushed on stack so can pass &tid to pkfn
1836//
1837// Temp. registers:
1838//
1839// t0: used to calculate the dynamic stack size / used to hold pkfn address
1840// t1: used as temporary for stack placement calculation
1841// t2: used as temporary for stack arguments
1842// t3: used as temporary for number of remaining pkfn parms
1843// t4: used to traverse p_argv array
1844//
1845// return: a0 (always 1/TRUE)
1846//
1847
1848__gtid = -20
1849__tid = -24
1850
1851// -- Begin __kmp_invoke_microtask
1852// mark_begin;
1853 .text
1854 .globl __kmp_invoke_microtask
1855 .p2align 1
1856 .type __kmp_invoke_microtask,@function
1857__kmp_invoke_microtask:
1858 .cfi_startproc
1859
1860 // First, save ra and fp
1861 addi sp, sp, -16
1862 sd ra, 8(sp)
1863 sd fp, 0(sp)
1864 addi fp, sp, 16
1865 .cfi_def_cfa fp, 0
1866 .cfi_offset ra, -8
1867 .cfi_offset fp, -16
1868
1869 // Compute the dynamic stack size:
1870 //
1871 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
1872 // reference
1873 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
1874 // function by register. Given that we have 8 of such registers (a[0-7])
1875 // and two + 'argc' arguments (consider &gtid and &tid), we need to
1876 // reserve max(0, argc - 6)*8 extra bytes
1877 //
1878 // The total number of bytes is then max(0, argc - 6)*8 + 8
1879
1880 // Compute max(0, argc - 6) using the following bithack:
1881 // max(0, x) = x - (x & (x >> 31)), where x := argc - 6
1882 // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
1883 addi t0, a3, -6
1884 srai t1, t0, 31
1885 and t1, t0, t1
1886 sub t0, t0, t1
1887
1888 addi t0, t0, 1
1889
1890 slli t0, t0, 3
1891 sub sp, sp, t0
1892
1893 // Align the stack to 16 bytes
1894 andi sp, sp, -16
1895
1896 mv t0, a0
1897 mv t3, a3
1898 mv t4, a4
1899
1900#if OMPT_SUPPORT
1901 // Save frame pointer into exit_frame
1902 sd fp, 0(a5)
1903#endif
1904
1905 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
1906
1907 sw a1, __gtid(fp)
1908 sw a2, __tid(fp)
1909
1910 addi a0, fp, __gtid
1911 addi a1, fp, __tid
1912
1913 beqz t3, .L_kmp_3
1914 ld a2, 0(t4)
1915
1916 addi t3, t3, -1
1917 beqz t3, .L_kmp_3
1918 ld a3, 8(t4)
1919
1920 addi t3, t3, -1
1921 beqz t3, .L_kmp_3
1922 ld a4, 16(t4)
1923
1924 addi t3, t3, -1
1925 beqz t3, .L_kmp_3
1926 ld a5, 24(t4)
1927
1928 addi t3, t3, -1
1929 beqz t3, .L_kmp_3
1930 ld a6, 32(t4)
1931
1932 addi t3, t3, -1
1933 beqz t3, .L_kmp_3
1934 ld a7, 40(t4)
1935
1936 // Prepare any additional argument passed through the stack
1937 addi t4, t4, 48
1938 mv t1, sp
1939 j .L_kmp_2
1940.L_kmp_1:
1941 ld t2, 0(t4)
1942 sd t2, 0(t1)
1943 addi t4, t4, 8
1944 addi t1, t1, 8
1945.L_kmp_2:
1946 addi t3, t3, -1
1947 bnez t3, .L_kmp_1
1948
1949.L_kmp_3:
1950 // Call pkfn function
1951 jalr t0
1952
1953 // Restore stack and return
1954
1955 addi a0, zero, 1
1956
1957 addi sp, fp, -16
1958 ld fp, 0(sp)
1959 ld ra, 8(sp)
1960 addi sp, sp, 16
1961 ret
1962.Lfunc_end0:
1963 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
1964 .cfi_endproc
1965
1966// -- End __kmp_invoke_microtask
1967
1968#endif /* KMP_ARCH_RISCV64 */
1969
1970#if KMP_ARCH_LOONGARCH64
1971
1972//------------------------------------------------------------------------
1973//
1974// typedef void (*microtask_t)(int *gtid, int *tid, ...);
1975//
1976// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1977// void *p_argv[]
1978// #if OMPT_SUPPORT
1979// ,
1980// void **exit_frame_ptr
1981// #endif
1982// ) {
1983// #if OMPT_SUPPORT
1984// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1985// #endif
1986//
1987// (*pkfn)(&gtid, &tid, argv[0], ...);
1988//
1989// return 1;
1990// }
1991//
1992// Parameters:
1993// a0: pkfn
1994// a1: gtid
1995// a2: tid
1996// a3: argc
1997// a4: p_argv
1998// a5: exit_frame_ptr
1999//
2000// Locals:
2001// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
2002// __tid: tid param pushed on stack so can pass &tid to pkfn
2003//
2004// Temp registers:
2005//
2006// t0: used to calculate the dynamic stack size / used to hold pkfn address
2007// t1: used as temporary for stack placement calculation
2008// t2: used as temporary for stack arguments
2009// t3: used as temporary for number of remaining pkfn parms
2010// t4: used to traverse p_argv array
2011//
2012// return: a0 (always 1/TRUE)
2013//
2014
2015// -- Begin __kmp_invoke_microtask
2016// mark_begin;
2017 .text
2018 .globl __kmp_invoke_microtask
2019 .p2align 2
2020 .type __kmp_invoke_microtask,@function
2021__kmp_invoke_microtask:
2022 .cfi_startproc
2023
2024 // First, save ra and fp
2025 addi.d $sp, $sp, -16
2026 st.d $ra, $sp, 8
2027 st.d $fp, $sp, 0
2028 addi.d $fp, $sp, 16
2029 .cfi_def_cfa 22, 0
2030 .cfi_offset 1, -8
2031 .cfi_offset 22, -16
2032
2033 // Compute the dynamic stack size:
2034 //
2035 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
2036 // reference
2037 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
2038 // function by register. Given that we have 8 of such registers (a[0-7])
2039 // and two + 'argc' arguments (consider &gtid and &tid), we need to
2040 // reserve max(0, argc - 6)*8 extra bytes
2041 //
2042 // The total number of bytes is then max(0, argc - 6)*8 + 8
2043
2044 addi.d $t0, $a3, -6
2045 slt $t1, $t0, $zero
2046 masknez $t0, $t0, $t1
2047 addi.d $t0, $t0, 1
2048 slli.d $t0, $t0, 3
2049 sub.d $sp, $sp, $t0
2050
2051 // Align the stack to 16 bytes
2052 bstrins.d $sp, $zero, 3, 0
2053
2054 move $t0, $a0
2055 move $t3, $a3
2056 move $t4, $a4
2057
2058#if OMPT_SUPPORT
2059 // Save frame pointer into exit_frame
2060 st.d $fp, $a5, 0
2061#endif
2062
2063 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
2064
2065 st.w $a1, $fp, -20
2066 st.w $a2, $fp, -24
2067
2068 addi.d $a0, $fp, -20
2069 addi.d $a1, $fp, -24
2070
2071 beqz $t3, .L_kmp_3
2072 ld.d $a2, $t4, 0
2073
2074 addi.d $t3, $t3, -1
2075 beqz $t3, .L_kmp_3
2076 ld.d $a3, $t4, 8
2077
2078 addi.d $t3, $t3, -1
2079 beqz $t3, .L_kmp_3
2080 ld.d $a4, $t4, 16
2081
2082 addi.d $t3, $t3, -1
2083 beqz $t3, .L_kmp_3
2084 ld.d $a5, $t4, 24
2085
2086 addi.d $t3, $t3, -1
2087 beqz $t3, .L_kmp_3
2088 ld.d $a6, $t4, 32
2089
2090 addi.d $t3, $t3, -1
2091 beqz $t3, .L_kmp_3
2092 ld.d $a7, $t4, 40
2093
2094 // Prepare any additional argument passed through the stack
2095 addi.d $t4, $t4, 48
2096 move $t1, $sp
2097 b .L_kmp_2
2098.L_kmp_1:
2099 ld.d $t2, $t4, 0
2100 st.d $t2, $t1, 0
2101 addi.d $t4, $t4, 8
2102 addi.d $t1, $t1, 8
2103.L_kmp_2:
2104 addi.d $t3, $t3, -1
2105 bnez $t3, .L_kmp_1
2106
2107.L_kmp_3:
2108 // Call pkfn function
2109 jirl $ra, $t0, 0
2110
2111 // Restore stack and return
2112
2113 addi.d $a0, $zero, 1
2114
2115 addi.d $sp, $fp, -16
2116 ld.d $fp, $sp, 0
2117 ld.d $ra, $sp, 8
2118 addi.d $sp, $sp, 16
2119 jr $ra
2120.Lfunc_end0:
2121 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2122 .cfi_endproc
2123
2124// -- End __kmp_invoke_microtask
2125
2126#endif /* KMP_ARCH_LOONGARCH64 */
2127
2128#if KMP_ARCH_VE
2129
2130//------------------------------------------------------------------------
2131//
2132// typedef void (*microtask_t)(int *gtid, int *tid, ...);
2133//
2134// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2135// void *p_argv[]
2136// #if OMPT_SUPPORT
2137// ,
2138// void **exit_frame_ptr
2139// #endif
2140// ) {
2141// #if OMPT_SUPPORT
2142// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2143// #endif
2144//
2145// (*pkfn)(&gtid, &tid, argv[0], ...);
2146//
2147// return 1;
2148// }
2149//
2150// Parameters:
2151// s0: pkfn
2152// s1: gtid
2153// s2: tid
2154// s3: argc
2155// s4: p_argv
2156// s5: exit_frame_ptr
2157//
2158// Locals:
2159// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
2160// __tid: tid param pushed on stack so can pass &tid to pkfn
2161//
2162// Temp. registers:
2163//
2164// s34: used to calculate the dynamic stack size
2165// s35: used as temporary for stack placement calculation
2166// s36: used as temporary for stack arguments
2167// s37: used as temporary for number of remaining pkfn parms
2168// s38: used to traverse p_argv array
2169//
2170// return: s0 (always 1/TRUE)
2171//
2172
2173__gtid = -4
2174__tid = -8
2175
2176// -- Begin __kmp_invoke_microtask
2177// mark_begin;
2178 .text
2179 .globl __kmp_invoke_microtask
2180 // A function requires 8 bytes align.
2181 .p2align 3
2182 .type __kmp_invoke_microtask,@function
2183__kmp_invoke_microtask:
2184 .cfi_startproc
2185
2186 // First, save fp and lr. VE stores them at caller stack frame.
2187 st %fp, 0(, %sp)
2188 st %lr, 8(, %sp)
2189 or %fp, 0, %sp
2190 .cfi_def_cfa %fp, 0
2191 .cfi_offset %lr, 8
2192 .cfi_offset %fp, 0
2193
2194 // Compute the dynamic stack size:
2195 //
2196 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
2197 // by reference
2198 // - We need 8 bytes for whole arguments. We have two + 'argc'
2199 // arguments (condider &gtid and &tid). We need to reserve
2200 // (argc + 2) * 8 bytes.
2201 // - We need 176 bytes for RSA and others
2202 //
2203 // The total number of bytes is then (argc + 2) * 8 + 8 + 176.
2204 //
2205 // |------------------------------|
2206 // | return address of callee | 8(%fp)
2207 // |------------------------------|
2208 // | frame pointer of callee | 0(%fp)
2209 // |------------------------------| <------------------ %fp
2210 // | __tid / __gtid | -8(%fp) / -4(%fp)
2211 // |------------------------------|
2212 // | argc+2 for arguments | 176(%sp)
2213 // |------------------------------|
2214 // | RSA |
2215 // |------------------------------|
2216 // | return address |
2217 // |------------------------------|
2218 // | frame pointer |
2219 // |------------------------------| <------------------ %sp
2220
2221 adds.w.sx %s34, 2, %s3
2222 sll %s34, %s34, 3
2223 lea %s34, 184(, %s34)
2224 subs.l %sp, %sp, %s34
2225
2226 // Align the stack to 16 bytes.
2227 and %sp, -16, %sp
2228
2229 // Save pkfn.
2230 or %s12, 0, %s0
2231
2232 // Call host to allocate stack if it is necessary.
2233 brge.l %sp, %sl, .L_kmp_pass
2234 ld %s61, 24(, %tp)
2235 lea %s63, 0x13b
2236 shm.l %s63, 0(%s61)
2237 shm.l %sl, 8(%s61)
2238 shm.l %sp, 16(%s61)
2239 monc
2240
2241.L_kmp_pass:
2242 lea %s35, 176(, %sp)
2243 adds.w.sx %s37, 0, %s3
2244 or %s38, 0, %s4
2245
2246#if OMPT_SUPPORT
2247 // Save frame pointer into exit_frame.
2248 st %fp, 0(%s5)
2249#endif
2250
2251 // Prepare arguments for the pkfn function (first 8 using s0-s7
2252 // registers, but need to store stack also because of varargs).
2253
2254 stl %s1, __gtid(%fp)
2255 stl %s2, __tid(%fp)
2256
2257 adds.l %s0, __gtid, %fp
2258 st %s0, 0(, %s35)
2259 adds.l %s1, __tid, %fp
2260 st %s1, 8(, %s35)
2261
2262 breq.l 0, %s37, .L_kmp_call
2263 ld %s2, 0(, %s38)
2264 st %s2, 16(, %s35)
2265
2266 breq.l 1, %s37, .L_kmp_call
2267 ld %s3, 8(, %s38)
2268 st %s3, 24(, %s35)
2269
2270 breq.l 2, %s37, .L_kmp_call
2271 ld %s4, 16(, %s38)
2272 st %s4, 32(, %s35)
2273
2274 breq.l 3, %s37, .L_kmp_call
2275 ld %s5, 24(, %s38)
2276 st %s5, 40(, %s35)
2277
2278 breq.l 4, %s37, .L_kmp_call
2279 ld %s6, 32(, %s38)
2280 st %s6, 48(, %s35)
2281
2282 breq.l 5, %s37, .L_kmp_call
2283 ld %s7, 40(, %s38)
2284 st %s7, 56(, %s35)
2285
2286 breq.l 6, %s37, .L_kmp_call
2287
2288 // Prepare any additional argument passed through the stack.
2289 adds.l %s37, -6, %s37
2290 lea %s38, 48(, %s38)
2291 lea %s35, 64(, %s35)
2292.L_kmp_loop:
2293 ld %s36, 0(, %s38)
2294 st %s36, 0(, %s35)
2295 adds.l %s37, -1, %s37
2296 adds.l %s38, 8, %s38
2297 adds.l %s35, 8, %s35
2298 brne.l 0, %s37, .L_kmp_loop
2299
2300.L_kmp_call:
2301 // Call pkfn function.
2302 bsic %lr, (, %s12)
2303
2304 // Return value.
2305 lea %s0, 1
2306
2307 // Restore stack and return.
2308 or %sp, 0, %fp
2309 ld %lr, 8(, %sp)
2310 ld %fp, 0(, %sp)
2311 b.l.t (, %lr)
2312.Lfunc_end0:
2313 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2314 .cfi_endproc
2315
2316// -- End __kmp_invoke_microtask
2317
2318#endif /* KMP_ARCH_VE */
2319
2320#if KMP_ARCH_S390X
2321
2322//------------------------------------------------------------------------
2323//
2324// typedef void (*microtask_t)(int *gtid, int *tid, ...);
2325//
2326// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2327// void *p_argv[]
2328// #if OMPT_SUPPORT
2329// ,
2330// void **exit_frame_ptr
2331// #endif
2332// ) {
2333// #if OMPT_SUPPORT
2334// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2335// #endif
2336//
2337// (*pkfn)(&gtid, &tid, argv[0], ...);
2338//
2339// return 1;
2340// }
2341//
2342// Parameters:
2343// r2: pkfn
2344// r3: gtid
2345// r4: tid
2346// r5: argc
2347// r6: p_argv
2348// SP+160: exit_frame_ptr
2349//
2350// Locals:
2351// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
2352// __tid: tid param pushed on stack so can pass &tid to pkfn
2353//
2354// Temp. registers:
2355//
2356// r0: used to fetch argv slots
2357// r7: used as temporary for number of remaining pkfn parms
2358// r8: argv
2359// r9: pkfn
2360// r10: stack size
2361// r11: previous fp
2362// r12: stack parameter area
2363// r13: argv slot
2364//
2365// return: r2 (always 1/TRUE)
2366//
2367
2368// -- Begin __kmp_invoke_microtask
2369// mark_begin;
2370 .text
2371 .globl __kmp_invoke_microtask
2372 .p2align 1
2373 .type __kmp_invoke_microtask,@function
2374__kmp_invoke_microtask:
2375 .cfi_startproc
2376
2377 stmg %r6,%r14,48(%r15)
2378 .cfi_offset %r6, -112
2379 .cfi_offset %r7, -104
2380 .cfi_offset %r8, -96
2381 .cfi_offset %r9, -88
2382 .cfi_offset %r10, -80
2383 .cfi_offset %r11, -72
2384 .cfi_offset %r12, -64
2385 .cfi_offset %r13, -56
2386 .cfi_offset %r14, -48
2387 .cfi_offset %r15, -40
2388 lgr %r11,%r15
2389 .cfi_def_cfa %r11, 160
2390
2391 // Compute the dynamic stack size:
2392 //
2393 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
2394 // reference
2395 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
2396 // function by register. Given that we have 5 of such registers (r[2-6])
2397 // and two + 'argc' arguments (consider &gtid and &tid), we need to
2398 // reserve max(0, argc - 3)*8 extra bytes
2399 //
2400 // The total number of bytes is then max(0, argc - 3)*8 + 8
2401
2402 lgr %r10,%r5
2403 aghi %r10,-2
2404 jnm 0f
2405 lghi %r10,0
24060:
2407 sllg %r10,%r10,3
2408 lgr %r12,%r10
2409 aghi %r10,176
2410 sgr %r15,%r10
2411 agr %r12,%r15
2412 stg %r11,0(%r15)
2413
2414 lgr %r9,%r2 // pkfn
2415
2416#if OMPT_SUPPORT
2417 // Save frame pointer into exit_frame
2418 lg %r8,160(%r11)
2419 stg %r11,0(%r8)
2420#endif
2421
2422 // Prepare arguments for the pkfn function (first 5 using r2-r6 registers)
2423
2424 stg %r3,160(%r12)
2425 la %r2,164(%r12) // gid
2426 stg %r4,168(%r12)
2427 la %r3,172(%r12) // tid
2428 lgr %r8,%r6 // argv
2429
2430 // If argc > 0
2431 ltgr %r7,%r5
2432 jz 1f
2433
2434 lg %r4,0(%r8) // argv[0]
2435 aghi %r7,-1
2436 jz 1f
2437
2438 // If argc > 1
2439 lg %r5,8(%r8) // argv[1]
2440 aghi %r7,-1
2441 jz 1f
2442
2443 // If argc > 2
2444 lg %r6,16(%r8) // argv[2]
2445 aghi %r7,-1
2446 jz 1f
2447
2448 lghi %r13,0 // Index [n]
24492:
2450 lg %r0,24(%r13,%r8) // argv[2+n]
2451 stg %r0,160(%r13,%r15) // parm[2+n]
2452 aghi %r13,8 // Next
2453 aghi %r7,-1
2454 jnz 2b
2455
24561:
2457 basr %r14,%r9 // Call pkfn
2458
2459 // Restore stack and return
2460
2461 lgr %r15,%r11
2462 lmg %r6,%r14,48(%r15)
2463 lghi %r2,1
2464 br %r14
2465.Lfunc_end0:
2466 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2467 .cfi_endproc
2468
2469// -- End __kmp_invoke_microtask
2470
2471#endif /* KMP_ARCH_S390X */
2472
2473#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC32
2474#ifndef KMP_PREFIX_UNDERSCORE
2475# define KMP_PREFIX_UNDERSCORE(x) x
2476#endif
2477 .data
2478 COMMON .gomp_critical_user_, 32, 3
2479 .data
2480 .align 4
2481 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
2482KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
2483 .4byte .gomp_critical_user_
2484#ifdef __ELF__
2485 .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),%object
2486 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4
2487#endif
2488#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC32 */
2489
2490#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
2491 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \
2492 KMP_ARCH_S390X || KMP_ARCH_SPARC64
2493#ifndef KMP_PREFIX_UNDERSCORE
2494# define KMP_PREFIX_UNDERSCORE(x) x
2495#endif
2496 .data
2497 COMMON .gomp_critical_user_, 32, 3
2498 .data
2499 .align 8
2500 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
2501KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
2502 .8byte .gomp_critical_user_
2503#ifdef __ELF__
2504 .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),%object
2505 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
2506#endif
2507#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
2508 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE ||
2509 KMP_ARCH_S390X || KMP_ARCH_SPARC64 */
2510
2511#if KMP_OS_LINUX
2512# if KMP_ARCH_ARM || KMP_ARCH_AARCH64
2513.section .note.GNU-stack,"",%progbits
2514# elif !KMP_ARCH_WASM
2515.section .note.GNU-stack,"",@progbits
2516# endif
2517#endif
2518
2519#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
2520GNU_PROPERTY_BTI_PAC
2521#endif
2522

source code of openmp/runtime/src/z_Linux_asm.S