1// z_Linux_asm.S: - microtasking routines specifically
2// written for Intel platforms running Linux* OS
3
4//
5////===----------------------------------------------------------------------===//
6////
7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8//// See https://llvm.org/LICENSE.txt for license information.
9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10////
11////===----------------------------------------------------------------------===//
12//
13
14// -----------------------------------------------------------------------
15// macros
16// -----------------------------------------------------------------------
17
18#include "kmp_config.h"
19
20#if KMP_ARCH_X86 || KMP_ARCH_X86_64
21
22# if KMP_MIC
23// the 'delay r16/r32/r64' should be used instead of the 'pause'.
24// The delay operation has the effect of removing the current thread from
25// the round-robin HT mechanism, and therefore speeds up the issue rate of
26// the other threads on the same core.
27//
28// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
29// barrier time to increase greatly for 3 or more threads per core.
30//
31// A value of 100 works pretty well for up to 4 threads per core, but isn't
32// quite as fast as 0 for 2 threads per core.
33//
34// We need to check what happens for oversubscription / > 4 threads per core.
35// It is possible that we need to pass the delay value in as a parameter
36// that the caller determines based on the total # threads / # cores.
37//
38//.macro pause_op
39// mov $100, %rax
40// delay %rax
41//.endm
42# else
43# define pause_op .byte 0xf3,0x90
44# endif // KMP_MIC
45
46# if KMP_OS_DARWIN
47# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
48# define KMP_LABEL(x) L_##x // form the name of label
49.macro KMP_CFI_DEF_OFFSET
50.endmacro
51.macro KMP_CFI_OFFSET
52.endmacro
53.macro KMP_CFI_REGISTER
54.endmacro
55.macro KMP_CFI_DEF
56.endmacro
57.macro ALIGN
58 .align $0
59.endmacro
60.macro DEBUG_INFO
61/* Not sure what .size does in icc, not sure if we need to do something
62 similar for OS X*.
63*/
64.endmacro
65.macro PROC
66 ALIGN 4
67 .globl KMP_PREFIX_UNDERSCORE($0)
68KMP_PREFIX_UNDERSCORE($0):
69.endmacro
70# else // KMP_OS_DARWIN
71# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
72// Format labels so that they don't override function names in gdb's backtraces
73// MIC assembler doesn't accept .L syntax, the L works fine there (as well as
74// on OS X*)
75# if KMP_MIC
76# define KMP_LABEL(x) L_##x // local label
77# else
78# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
79# endif // KMP_MIC
80.macro ALIGN size
81 .align 1<<(\size)
82.endm
83.macro DEBUG_INFO proc
84 .cfi_endproc
85// Not sure why we need .type and .size for the functions
86 .align 16
87 .type \proc,@function
88 .size \proc,.-\proc
89.endm
90.macro PROC proc
91 ALIGN 4
92 .globl KMP_PREFIX_UNDERSCORE(\proc)
93KMP_PREFIX_UNDERSCORE(\proc):
94 .cfi_startproc
95.endm
96.macro KMP_CFI_DEF_OFFSET sz
97 .cfi_def_cfa_offset \sz
98.endm
99.macro KMP_CFI_OFFSET reg, sz
100 .cfi_offset \reg,\sz
101.endm
102.macro KMP_CFI_REGISTER reg
103 .cfi_def_cfa_register \reg
104.endm
105.macro KMP_CFI_DEF reg, sz
106 .cfi_def_cfa \reg,\sz
107.endm
108# endif // KMP_OS_DARWIN
109#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
110
111#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
112
113# if KMP_OS_DARWIN
114# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
115# define KMP_LABEL(x) L_##x // form the name of label
116
117.macro ALIGN
118 .align $0
119.endmacro
120
121.macro DEBUG_INFO
122/* Not sure what .size does in icc, not sure if we need to do something
123 similar for OS X*.
124*/
125.endmacro
126
127.macro PROC
128 ALIGN 4
129 .globl KMP_PREFIX_UNDERSCORE($0)
130KMP_PREFIX_UNDERSCORE($0):
131.endmacro
132# elif KMP_OS_WINDOWS
133# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols
134// Format labels so that they don't override function names in gdb's backtraces
135# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
136
137.macro ALIGN size
138 .align 1<<(\size)
139.endm
140
141.macro DEBUG_INFO proc
142 ALIGN 2
143.endm
144
145.macro PROC proc
146 ALIGN 2
147 .globl KMP_PREFIX_UNDERSCORE(\proc)
148KMP_PREFIX_UNDERSCORE(\proc):
149.endm
150# else // KMP_OS_DARWIN || KMP_OS_WINDOWS
151# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
152// Format labels so that they don't override function names in gdb's backtraces
153# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
154
155.macro ALIGN size
156 .align 1<<(\size)
157.endm
158
159.macro DEBUG_INFO proc
160 .cfi_endproc
161// Not sure why we need .type and .size for the functions
162 ALIGN 2
163#if KMP_ARCH_ARM
164 .type \proc,%function
165#else
166 .type \proc,@function
167#endif
168 .size \proc,.-\proc
169.endm
170
171.macro PROC proc
172 ALIGN 2
173 .globl KMP_PREFIX_UNDERSCORE(\proc)
174KMP_PREFIX_UNDERSCORE(\proc):
175 .cfi_startproc
176.endm
177# endif // KMP_OS_DARWIN
178
179#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
180
181.macro COMMON name, size, align_power
182#if KMP_OS_DARWIN
183 .comm \name, \size
184#elif KMP_OS_WINDOWS
185 .comm \name, \size, \align_power
186#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS
187 .comm \name, \size, (1<<(\align_power))
188#endif
189.endm
190
191// -----------------------------------------------------------------------
192// data
193// -----------------------------------------------------------------------
194
195#ifdef KMP_GOMP_COMPAT
196
197// Support for unnamed common blocks.
198//
199// Because the symbol ".gomp_critical_user_" contains a ".", we have to
200// put this stuff in assembly.
201
202# if KMP_ARCH_X86
203# if KMP_OS_DARWIN
204 .data
205 .comm .gomp_critical_user_,32
206 .data
207 .globl ___kmp_unnamed_critical_addr
208___kmp_unnamed_critical_addr:
209 .long .gomp_critical_user_
210# else /* Linux* OS */
211 .data
212 .comm .gomp_critical_user_,32,8
213 .data
214 ALIGN 4
215 .global __kmp_unnamed_critical_addr
216__kmp_unnamed_critical_addr:
217 .4byte .gomp_critical_user_
218 .type __kmp_unnamed_critical_addr,@object
219 .size __kmp_unnamed_critical_addr,4
220# endif /* KMP_OS_DARWIN */
221# endif /* KMP_ARCH_X86 */
222
223# if KMP_ARCH_X86_64
224# if KMP_OS_DARWIN
225 .data
226 .comm .gomp_critical_user_,32
227 .data
228 .globl ___kmp_unnamed_critical_addr
229___kmp_unnamed_critical_addr:
230 .quad .gomp_critical_user_
231# else /* Linux* OS */
232 .data
233 .comm .gomp_critical_user_,32,8
234 .data
235 ALIGN 8
236 .global __kmp_unnamed_critical_addr
237__kmp_unnamed_critical_addr:
238 .8byte .gomp_critical_user_
239 .type __kmp_unnamed_critical_addr,@object
240 .size __kmp_unnamed_critical_addr,8
241# endif /* KMP_OS_DARWIN */
242# endif /* KMP_ARCH_X86_64 */
243
244#endif /* KMP_GOMP_COMPAT */
245
246
247#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
248
249// -----------------------------------------------------------------------
250// microtasking routines specifically written for IA-32 architecture
251// running Linux* OS
252// -----------------------------------------------------------------------
253
254 .ident "Intel Corporation"
255 .data
256 ALIGN 4
257// void
258// __kmp_x86_pause( void );
259
260 .text
261 PROC __kmp_x86_pause
262
263 pause_op
264 ret
265
266 DEBUG_INFO __kmp_x86_pause
267
268# if !KMP_ASM_INTRINS
269
270//------------------------------------------------------------------------
271// kmp_int32
272// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
273
274 PROC __kmp_test_then_add32
275
276 movl 4(%esp), %ecx
277 movl 8(%esp), %eax
278 lock
279 xaddl %eax,(%ecx)
280 ret
281
282 DEBUG_INFO __kmp_test_then_add32
283
284//------------------------------------------------------------------------
285// FUNCTION __kmp_xchg_fixed8
286//
287// kmp_int32
288// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
289//
290// parameters:
291// p: 4(%esp)
292// d: 8(%esp)
293//
294// return: %al
295 PROC __kmp_xchg_fixed8
296
297 movl 4(%esp), %ecx // "p"
298 movb 8(%esp), %al // "d"
299
300 lock
301 xchgb %al,(%ecx)
302 ret
303
304 DEBUG_INFO __kmp_xchg_fixed8
305
306
307//------------------------------------------------------------------------
308// FUNCTION __kmp_xchg_fixed16
309//
310// kmp_int16
311// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
312//
313// parameters:
314// p: 4(%esp)
315// d: 8(%esp)
316// return: %ax
317 PROC __kmp_xchg_fixed16
318
319 movl 4(%esp), %ecx // "p"
320 movw 8(%esp), %ax // "d"
321
322 lock
323 xchgw %ax,(%ecx)
324 ret
325
326 DEBUG_INFO __kmp_xchg_fixed16
327
328
329//------------------------------------------------------------------------
330// FUNCTION __kmp_xchg_fixed32
331//
332// kmp_int32
333// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
334//
335// parameters:
336// p: 4(%esp)
337// d: 8(%esp)
338//
339// return: %eax
340 PROC __kmp_xchg_fixed32
341
342 movl 4(%esp), %ecx // "p"
343 movl 8(%esp), %eax // "d"
344
345 lock
346 xchgl %eax,(%ecx)
347 ret
348
349 DEBUG_INFO __kmp_xchg_fixed32
350
351
352// kmp_int8
353// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
354 PROC __kmp_compare_and_store8
355
356 movl 4(%esp), %ecx
357 movb 8(%esp), %al
358 movb 12(%esp), %dl
359 lock
360 cmpxchgb %dl,(%ecx)
361 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
362 and $1, %eax // sign extend previous instruction
363 ret
364
365 DEBUG_INFO __kmp_compare_and_store8
366
367// kmp_int16
368// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
369 PROC __kmp_compare_and_store16
370
371 movl 4(%esp), %ecx
372 movw 8(%esp), %ax
373 movw 12(%esp), %dx
374 lock
375 cmpxchgw %dx,(%ecx)
376 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
377 and $1, %eax // sign extend previous instruction
378 ret
379
380 DEBUG_INFO __kmp_compare_and_store16
381
382// kmp_int32
383// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
384 PROC __kmp_compare_and_store32
385
386 movl 4(%esp), %ecx
387 movl 8(%esp), %eax
388 movl 12(%esp), %edx
389 lock
390 cmpxchgl %edx,(%ecx)
391 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
392 and $1, %eax // sign extend previous instruction
393 ret
394
395 DEBUG_INFO __kmp_compare_and_store32
396
397// kmp_int32
398// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
399 PROC __kmp_compare_and_store64
400
401 pushl %ebp
402 movl %esp, %ebp
403 pushl %ebx
404 pushl %edi
405 movl 8(%ebp), %edi
406 movl 12(%ebp), %eax // "cv" low order word
407 movl 16(%ebp), %edx // "cv" high order word
408 movl 20(%ebp), %ebx // "sv" low order word
409 movl 24(%ebp), %ecx // "sv" high order word
410 lock
411 cmpxchg8b (%edi)
412 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
413 and $1, %eax // sign extend previous instruction
414 popl %edi
415 popl %ebx
416 movl %ebp, %esp
417 popl %ebp
418 ret
419
420 DEBUG_INFO __kmp_compare_and_store64
421
422// kmp_int8
423// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
424 PROC __kmp_compare_and_store_ret8
425
426 movl 4(%esp), %ecx
427 movb 8(%esp), %al
428 movb 12(%esp), %dl
429 lock
430 cmpxchgb %dl,(%ecx)
431 ret
432
433 DEBUG_INFO __kmp_compare_and_store_ret8
434
435// kmp_int16
436// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
437// kmp_int16 sv);
438 PROC __kmp_compare_and_store_ret16
439
440 movl 4(%esp), %ecx
441 movw 8(%esp), %ax
442 movw 12(%esp), %dx
443 lock
444 cmpxchgw %dx,(%ecx)
445 ret
446
447 DEBUG_INFO __kmp_compare_and_store_ret16
448
449// kmp_int32
450// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
451// kmp_int32 sv);
452 PROC __kmp_compare_and_store_ret32
453
454 movl 4(%esp), %ecx
455 movl 8(%esp), %eax
456 movl 12(%esp), %edx
457 lock
458 cmpxchgl %edx,(%ecx)
459 ret
460
461 DEBUG_INFO __kmp_compare_and_store_ret32
462
463// kmp_int64
464// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
465// kmp_int64 sv);
466 PROC __kmp_compare_and_store_ret64
467
468 pushl %ebp
469 movl %esp, %ebp
470 pushl %ebx
471 pushl %edi
472 movl 8(%ebp), %edi
473 movl 12(%ebp), %eax // "cv" low order word
474 movl 16(%ebp), %edx // "cv" high order word
475 movl 20(%ebp), %ebx // "sv" low order word
476 movl 24(%ebp), %ecx // "sv" high order word
477 lock
478 cmpxchg8b (%edi)
479 popl %edi
480 popl %ebx
481 movl %ebp, %esp
482 popl %ebp
483 ret
484
485 DEBUG_INFO __kmp_compare_and_store_ret64
486
487
488//------------------------------------------------------------------------
489// FUNCTION __kmp_xchg_real32
490//
491// kmp_real32
492// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
493//
494// parameters:
495// addr: 4(%esp)
496// data: 8(%esp)
497//
498// return: %eax
499 PROC __kmp_xchg_real32
500
501 pushl %ebp
502 movl %esp, %ebp
503 subl $4, %esp
504 pushl %esi
505
506 movl 4(%ebp), %esi
507 flds (%esi)
508 // load <addr>
509 fsts -4(%ebp)
510 // store old value
511
512 movl 8(%ebp), %eax
513
514 lock
515 xchgl %eax, (%esi)
516
517 flds -4(%ebp)
518 // return old value
519
520 popl %esi
521 movl %ebp, %esp
522 popl %ebp
523 ret
524
525 DEBUG_INFO __kmp_xchg_real32
526
527# endif /* !KMP_ASM_INTRINS */
528
529//------------------------------------------------------------------------
530// int
531// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
532// int gtid, int tid,
533// int argc, void *p_argv[]
534// #if OMPT_SUPPORT
535// ,
536// void **exit_frame_ptr
537// #endif
538// ) {
539// #if OMPT_SUPPORT
540// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
541// #endif
542//
543// (*pkfn)( & gtid, & tid, argv[0], ... );
544// return 1;
545// }
546
547// -- Begin __kmp_invoke_microtask
548// mark_begin;
549 PROC __kmp_invoke_microtask
550
551 pushl %ebp
552 KMP_CFI_DEF_OFFSET 8
553 KMP_CFI_OFFSET ebp,-8
554 movl %esp,%ebp // establish the base pointer for this routine.
555 KMP_CFI_REGISTER ebp
556 subl $8,%esp // allocate space for two local variables.
557 // These varibales are:
558 // argv: -4(%ebp)
559 // temp: -8(%ebp)
560 //
561 pushl %ebx // save %ebx to use during this routine
562 //
563#if OMPT_SUPPORT
564 movl 28(%ebp),%ebx // get exit_frame address
565 movl %ebp,(%ebx) // save exit_frame
566#endif
567
568 movl 20(%ebp),%ebx // Stack alignment - # args
569 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
570 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
571 movl %esp,%eax //
572 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
573 movl %eax,%ebx // Save to %ebx
574 andl $0xFFFFFF80,%eax // mask off 7 bits
575 subl %eax,%ebx // Amount to subtract from %esp
576 subl %ebx,%esp // Prepare the stack ptr --
577 // now it will be aligned on 128-byte boundary at the call
578
579 movl 24(%ebp),%eax // copy from p_argv[]
580 movl %eax,-4(%ebp) // into the local variable *argv.
581
582 movl 20(%ebp),%ebx // argc is 20(%ebp)
583 shll $2,%ebx
584
585KMP_LABEL(invoke_2):
586 cmpl $0,%ebx
587 jg KMP_LABEL(invoke_4)
588 jmp KMP_LABEL(invoke_3)
589 ALIGN 2
590KMP_LABEL(invoke_4):
591 movl -4(%ebp),%eax
592 subl $4,%ebx // decrement argc.
593 addl %ebx,%eax // index into argv.
594 movl (%eax),%edx
595 pushl %edx
596
597 jmp KMP_LABEL(invoke_2)
598 ALIGN 2
599KMP_LABEL(invoke_3):
600 leal 16(%ebp),%eax // push & tid
601 pushl %eax
602
603 leal 12(%ebp),%eax // push & gtid
604 pushl %eax
605
606 movl 8(%ebp),%ebx
607 call *%ebx // call (*pkfn)();
608
609 movl $1,%eax // return 1;
610
611 movl -12(%ebp),%ebx // restore %ebx
612 leave
613 KMP_CFI_DEF esp,4
614 ret
615
616 DEBUG_INFO __kmp_invoke_microtask
617// -- End __kmp_invoke_microtask
618
619
620// kmp_uint64
621// __kmp_hardware_timestamp(void)
622 PROC __kmp_hardware_timestamp
623 rdtsc
624 ret
625
626 DEBUG_INFO __kmp_hardware_timestamp
627// -- End __kmp_hardware_timestamp
628
629#endif /* KMP_ARCH_X86 */
630
631
632#if KMP_ARCH_X86_64
633
634// -----------------------------------------------------------------------
635// microtasking routines specifically written for IA-32 architecture and
636// Intel(R) 64 running Linux* OS
637// -----------------------------------------------------------------------
638
639// -- Machine type P
640// mark_description "Intel Corporation";
641 .ident "Intel Corporation"
642// -- .file "z_Linux_asm.S"
643 .data
644 ALIGN 4
645
646// To prevent getting our code into .data section .text added to every routine
647// definition for x86_64.
648//------------------------------------------------------------------------
649# if !KMP_ASM_INTRINS
650
651//------------------------------------------------------------------------
652// FUNCTION __kmp_test_then_add32
653//
654// kmp_int32
655// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
656//
657// parameters:
658// p: %rdi
659// d: %esi
660//
661// return: %eax
662 .text
663 PROC __kmp_test_then_add32
664
665 movl %esi, %eax // "d"
666 lock
667 xaddl %eax,(%rdi)
668 ret
669
670 DEBUG_INFO __kmp_test_then_add32
671
672
673//------------------------------------------------------------------------
674// FUNCTION __kmp_test_then_add64
675//
676// kmp_int64
677// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
678//
679// parameters:
680// p: %rdi
681// d: %rsi
682// return: %rax
683 .text
684 PROC __kmp_test_then_add64
685
686 movq %rsi, %rax // "d"
687 lock
688 xaddq %rax,(%rdi)
689 ret
690
691 DEBUG_INFO __kmp_test_then_add64
692
693
694//------------------------------------------------------------------------
695// FUNCTION __kmp_xchg_fixed8
696//
697// kmp_int32
698// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
699//
700// parameters:
701// p: %rdi
702// d: %sil
703//
704// return: %al
705 .text
706 PROC __kmp_xchg_fixed8
707
708 movb %sil, %al // "d"
709
710 lock
711 xchgb %al,(%rdi)
712 ret
713
714 DEBUG_INFO __kmp_xchg_fixed8
715
716
717//------------------------------------------------------------------------
718// FUNCTION __kmp_xchg_fixed16
719//
720// kmp_int16
721// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
722//
723// parameters:
724// p: %rdi
725// d: %si
726// return: %ax
727 .text
728 PROC __kmp_xchg_fixed16
729
730 movw %si, %ax // "d"
731
732 lock
733 xchgw %ax,(%rdi)
734 ret
735
736 DEBUG_INFO __kmp_xchg_fixed16
737
738
739//------------------------------------------------------------------------
740// FUNCTION __kmp_xchg_fixed32
741//
742// kmp_int32
743// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
744//
745// parameters:
746// p: %rdi
747// d: %esi
748//
749// return: %eax
750 .text
751 PROC __kmp_xchg_fixed32
752
753 movl %esi, %eax // "d"
754
755 lock
756 xchgl %eax,(%rdi)
757 ret
758
759 DEBUG_INFO __kmp_xchg_fixed32
760
761
762//------------------------------------------------------------------------
763// FUNCTION __kmp_xchg_fixed64
764//
765// kmp_int64
766// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
767//
768// parameters:
769// p: %rdi
770// d: %rsi
771// return: %rax
772 .text
773 PROC __kmp_xchg_fixed64
774
775 movq %rsi, %rax // "d"
776
777 lock
778 xchgq %rax,(%rdi)
779 ret
780
781 DEBUG_INFO __kmp_xchg_fixed64
782
783
784//------------------------------------------------------------------------
785// FUNCTION __kmp_compare_and_store8
786//
787// kmp_int8
788// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
789//
790// parameters:
791// p: %rdi
792// cv: %esi
793// sv: %edx
794//
795// return: %eax
796 .text
797 PROC __kmp_compare_and_store8
798
799 movb %sil, %al // "cv"
800 lock
801 cmpxchgb %dl,(%rdi)
802 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
803 andq $1, %rax // sign extend previous instruction for return value
804 ret
805
806 DEBUG_INFO __kmp_compare_and_store8
807
808
809//------------------------------------------------------------------------
810// FUNCTION __kmp_compare_and_store16
811//
812// kmp_int16
813// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
814//
815// parameters:
816// p: %rdi
817// cv: %si
818// sv: %dx
819//
820// return: %eax
821 .text
822 PROC __kmp_compare_and_store16
823
824 movw %si, %ax // "cv"
825 lock
826 cmpxchgw %dx,(%rdi)
827 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
828 andq $1, %rax // sign extend previous instruction for return value
829 ret
830
831 DEBUG_INFO __kmp_compare_and_store16
832
833
834//------------------------------------------------------------------------
835// FUNCTION __kmp_compare_and_store32
836//
837// kmp_int32
838// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
839//
840// parameters:
841// p: %rdi
842// cv: %esi
843// sv: %edx
844//
845// return: %eax
846 .text
847 PROC __kmp_compare_and_store32
848
849 movl %esi, %eax // "cv"
850 lock
851 cmpxchgl %edx,(%rdi)
852 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
853 andq $1, %rax // sign extend previous instruction for return value
854 ret
855
856 DEBUG_INFO __kmp_compare_and_store32
857
858
859//------------------------------------------------------------------------
860// FUNCTION __kmp_compare_and_store64
861//
862// kmp_int32
863// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
864//
865// parameters:
866// p: %rdi
867// cv: %rsi
868// sv: %rdx
869// return: %eax
870 .text
871 PROC __kmp_compare_and_store64
872
873 movq %rsi, %rax // "cv"
874 lock
875 cmpxchgq %rdx,(%rdi)
876 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
877 andq $1, %rax // sign extend previous instruction for return value
878 ret
879
880 DEBUG_INFO __kmp_compare_and_store64
881
882//------------------------------------------------------------------------
883// FUNCTION __kmp_compare_and_store_ret8
884//
885// kmp_int8
886// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
887//
888// parameters:
889// p: %rdi
890// cv: %esi
891// sv: %edx
892//
893// return: %eax
894 .text
895 PROC __kmp_compare_and_store_ret8
896
897 movb %sil, %al // "cv"
898 lock
899 cmpxchgb %dl,(%rdi)
900 ret
901
902 DEBUG_INFO __kmp_compare_and_store_ret8
903
904
905//------------------------------------------------------------------------
906// FUNCTION __kmp_compare_and_store_ret16
907//
908// kmp_int16
909// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
910//
911// parameters:
912// p: %rdi
913// cv: %si
914// sv: %dx
915//
916// return: %eax
917 .text
918 PROC __kmp_compare_and_store_ret16
919
920 movw %si, %ax // "cv"
921 lock
922 cmpxchgw %dx,(%rdi)
923 ret
924
925 DEBUG_INFO __kmp_compare_and_store_ret16
926
927
928//------------------------------------------------------------------------
929// FUNCTION __kmp_compare_and_store_ret32
930//
931// kmp_int32
932// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
933//
934// parameters:
935// p: %rdi
936// cv: %esi
937// sv: %edx
938//
939// return: %eax
940 .text
941 PROC __kmp_compare_and_store_ret32
942
943 movl %esi, %eax // "cv"
944 lock
945 cmpxchgl %edx,(%rdi)
946 ret
947
948 DEBUG_INFO __kmp_compare_and_store_ret32
949
950
951//------------------------------------------------------------------------
952// FUNCTION __kmp_compare_and_store_ret64
953//
954// kmp_int64
955// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
956//
957// parameters:
958// p: %rdi
959// cv: %rsi
960// sv: %rdx
961// return: %eax
962 .text
963 PROC __kmp_compare_and_store_ret64
964
965 movq %rsi, %rax // "cv"
966 lock
967 cmpxchgq %rdx,(%rdi)
968 ret
969
970 DEBUG_INFO __kmp_compare_and_store_ret64
971
972# endif /* !KMP_ASM_INTRINS */
973
974
975# if !KMP_MIC
976
977# if !KMP_ASM_INTRINS
978
979//------------------------------------------------------------------------
980// FUNCTION __kmp_xchg_real32
981//
982// kmp_real32
983// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
984//
985// parameters:
986// addr: %rdi
987// data: %xmm0 (lower 4 bytes)
988//
989// return: %xmm0 (lower 4 bytes)
990 .text
991 PROC __kmp_xchg_real32
992
993 movd %xmm0, %eax // load "data" to eax
994
995 lock
996 xchgl %eax, (%rdi)
997
998 movd %eax, %xmm0 // load old value into return register
999
1000 ret
1001
1002 DEBUG_INFO __kmp_xchg_real32
1003
1004
1005//------------------------------------------------------------------------
1006// FUNCTION __kmp_xchg_real64
1007//
1008// kmp_real64
1009// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
1010//
1011// parameters:
1012// addr: %rdi
1013// data: %xmm0 (lower 8 bytes)
1014// return: %xmm0 (lower 8 bytes)
1015 .text
1016 PROC __kmp_xchg_real64
1017
1018 movd %xmm0, %rax // load "data" to rax
1019
1020 lock
1021 xchgq %rax, (%rdi)
1022
1023 movd %rax, %xmm0 // load old value into return register
1024 ret
1025
1026 DEBUG_INFO __kmp_xchg_real64
1027
1028
1029# endif /* !KMP_MIC */
1030
1031# endif /* !KMP_ASM_INTRINS */
1032
1033//------------------------------------------------------------------------
1034// int
1035// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1036// int gtid, int tid,
1037// int argc, void *p_argv[]
1038// #if OMPT_SUPPORT
1039// ,
1040// void **exit_frame_ptr
1041// #endif
1042// ) {
1043// #if OMPT_SUPPORT
1044// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1045// #endif
1046//
1047// (*pkfn)( & gtid, & tid, argv[0], ... );
1048// return 1;
1049// }
1050//
1051// note: at call to pkfn must have %rsp 128-byte aligned for compiler
1052//
1053// parameters:
1054// %rdi: pkfn
1055// %esi: gtid
1056// %edx: tid
1057// %ecx: argc
1058// %r8: p_argv
1059// %r9: &exit_frame
1060//
1061// locals:
1062// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1063// __tid: tid parm pushed on stack so can pass &tid to pkfn
1064//
1065// reg temps:
1066// %rax: used all over the place
1067// %rdx: used in stack pointer alignment calculation
1068// %r11: used to traverse p_argv array
1069// %rsi: used as temporary for stack parameters
1070// used as temporary for number of pkfn parms to push
1071// %rbx: used to hold pkfn address, and zero constant, callee-save
1072//
1073// return: %eax (always 1/TRUE)
1074__gtid = -16
1075__tid = -24
1076
1077// -- Begin __kmp_invoke_microtask
1078// mark_begin;
1079 .text
1080 PROC __kmp_invoke_microtask
1081
1082 pushq %rbp // save base pointer
1083 KMP_CFI_DEF_OFFSET 16
1084 KMP_CFI_OFFSET rbp,-16
1085 movq %rsp,%rbp // establish the base pointer for this routine.
1086 KMP_CFI_REGISTER rbp
1087
1088#if OMPT_SUPPORT
1089 movq %rbp, (%r9) // save exit_frame
1090#endif
1091
1092 pushq %rbx // %rbx is callee-saved register
1093 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
1094 pushq %rdx // Put tid on stack so can pass &tid to pkfn
1095
1096 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
1097 movq $0, %rbx // constant for cmovs later
1098 subq $4, %rax // subtract four args passed in registers to pkfn
1099#if KMP_MIC
1100 js KMP_LABEL(kmp_0) // jump to movq
1101 jmp KMP_LABEL(kmp_0_exit) // jump ahead
1102KMP_LABEL(kmp_0):
1103 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
1104KMP_LABEL(kmp_0_exit):
1105#else
1106 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
1107#endif // KMP_MIC
1108
1109 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
1110 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
1111
1112 movq %rsp, %rdx //
1113 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
1114 // without align, stack ptr would be this
1115 movq %rdx, %rax // Save to %rax
1116
1117 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
1118 subq %rax, %rdx // Amount to subtract from %rsp
1119 subq %rdx, %rsp // Prepare the stack ptr --
1120 // now %rsp will align to 128-byte boundary at call site
1121
1122 // setup pkfn parameter reg and stack
1123 movq %rcx, %rax // argc -> %rax
1124 cmpq $0, %rsi
1125 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
1126 shlq $3, %rcx // argc*8 -> %rcx
1127 movq %r8, %rdx // p_argv -> %rdx
1128 addq %rcx, %rdx // &p_argv[argc] -> %rdx
1129
1130 movq %rsi, %rcx // max (0, argc-4) -> %rcx
1131
1132KMP_LABEL(kmp_invoke_push_parms):
1133 // push nth - 7th parms to pkfn on stack
1134 subq $8, %rdx // decrement p_argv pointer to previous parm
1135 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
1136 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
1137 subl $1, %ecx
1138
1139// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
1140// if the name of the label that is an operand of this jecxz starts with a dot (".");
1141// Apple's linker does not support 1-byte length relocation;
1142// Resolution: replace all .labelX entries with L_labelX.
1143
1144 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
1145 jmp KMP_LABEL(kmp_invoke_push_parms)
1146 ALIGN 3
1147KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
1148 // order here is important to avoid trashing
1149 // registers used for both input and output parms!
1150 movq %rdi, %rbx // pkfn -> %rbx
1151 leaq __gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
1152 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
1153
1154 movq %r8, %r11 // p_argv -> %r11
1155
1156#if KMP_MIC
1157 cmpq $4, %rax // argc >= 4?
1158 jns KMP_LABEL(kmp_4) // jump to movq
1159 jmp KMP_LABEL(kmp_4_exit) // jump ahead
1160KMP_LABEL(kmp_4):
1161 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1162KMP_LABEL(kmp_4_exit):
1163
1164 cmpq $3, %rax // argc >= 3?
1165 jns KMP_LABEL(kmp_3) // jump to movq
1166 jmp KMP_LABEL(kmp_3_exit) // jump ahead
1167KMP_LABEL(kmp_3):
1168 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1169KMP_LABEL(kmp_3_exit):
1170
1171 cmpq $2, %rax // argc >= 2?
1172 jns KMP_LABEL(kmp_2) // jump to movq
1173 jmp KMP_LABEL(kmp_2_exit) // jump ahead
1174KMP_LABEL(kmp_2):
1175 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1176KMP_LABEL(kmp_2_exit):
1177
1178 cmpq $1, %rax // argc >= 1?
1179 jns KMP_LABEL(kmp_1) // jump to movq
1180 jmp KMP_LABEL(kmp_1_exit) // jump ahead
1181KMP_LABEL(kmp_1):
1182 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
1183KMP_LABEL(kmp_1_exit):
1184#else
1185 cmpq $4, %rax // argc >= 4?
1186 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1187
1188 cmpq $3, %rax // argc >= 3?
1189 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1190
1191 cmpq $2, %rax // argc >= 2?
1192 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1193
1194 cmpq $1, %rax // argc >= 1?
1195 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
1196#endif // KMP_MIC
1197
1198 call *%rbx // call (*pkfn)();
1199 movq $1, %rax // move 1 into return register;
1200
1201 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
1202 movq %rbp, %rsp // restore stack pointer
1203 popq %rbp // restore frame pointer
1204 KMP_CFI_DEF rsp,8
1205 ret
1206
1207 DEBUG_INFO __kmp_invoke_microtask
1208// -- End __kmp_invoke_microtask
1209
1210// kmp_uint64
1211// __kmp_hardware_timestamp(void)
1212 .text
1213 PROC __kmp_hardware_timestamp
1214 rdtsc
1215 shlq $32, %rdx
1216 orq %rdx, %rax
1217 ret
1218
1219 DEBUG_INFO __kmp_hardware_timestamp
1220// -- End __kmp_hardware_timestamp
1221
1222//------------------------------------------------------------------------
1223// FUNCTION __kmp_bsr32
1224//
1225// int
1226// __kmp_bsr32( int );
1227 .text
1228 PROC __kmp_bsr32
1229
1230 bsr %edi,%eax
1231 ret
1232
1233 DEBUG_INFO __kmp_bsr32
1234
1235// -----------------------------------------------------------------------
1236#endif /* KMP_ARCH_X86_64 */
1237
1238// '
1239#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
1240
1241//------------------------------------------------------------------------
1242// int
1243// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1244// int gtid, int tid,
1245// int argc, void *p_argv[]
1246// #if OMPT_SUPPORT
1247// ,
1248// void **exit_frame_ptr
1249// #endif
1250// ) {
1251// #if OMPT_SUPPORT
1252// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1253// #endif
1254//
1255// (*pkfn)( & gtid, & tid, argv[0], ... );
1256//
1257// // FIXME: This is done at call-site and can be removed here.
1258// #if OMPT_SUPPORT
1259// *exit_frame_ptr = 0;
1260// #endif
1261//
1262// return 1;
1263// }
1264//
1265// parameters:
1266// x0: pkfn
1267// w1: gtid
1268// w2: tid
1269// w3: argc
1270// x4: p_argv
1271// x5: &exit_frame
1272//
1273// locals:
1274// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1275// __tid: tid parm pushed on stack so can pass &tid to pkfn
1276//
1277// reg temps:
1278// x8: used to hold pkfn address
1279// w9: used as temporary for number of pkfn parms
1280// x10: used to traverse p_argv array
1281// x11: used as temporary for stack placement calculation
1282// x12: used as temporary for stack parameters
1283// x19: used to preserve exit_frame_ptr, callee-save
1284//
1285// return: w0 (always 1/TRUE)
1286//
1287
1288__gtid = 4
1289__tid = 8
1290
1291// -- Begin __kmp_invoke_microtask
1292// mark_begin;
1293 .text
1294 PROC __kmp_invoke_microtask
1295
1296 stp x29, x30, [sp, #-16]!
1297# if OMPT_SUPPORT
1298 stp x19, x20, [sp, #-16]!
1299# endif
1300 mov x29, sp
1301
1302 orr w9, wzr, #1
1303 add w9, w9, w3, lsr #1
1304 sub sp, sp, w9, uxtw #4
1305 mov x11, sp
1306
1307 mov x8, x0
1308 str w1, [x29, #-__gtid]
1309 str w2, [x29, #-__tid]
1310 mov w9, w3
1311 mov x10, x4
1312# if OMPT_SUPPORT
1313 mov x19, x5
1314 str x29, [x19]
1315# endif
1316
1317 sub x0, x29, #__gtid
1318 sub x1, x29, #__tid
1319
1320 cbz w9, KMP_LABEL(kmp_1)
1321 ldr x2, [x10]
1322
1323 sub w9, w9, #1
1324 cbz w9, KMP_LABEL(kmp_1)
1325 ldr x3, [x10, #8]!
1326
1327 sub w9, w9, #1
1328 cbz w9, KMP_LABEL(kmp_1)
1329 ldr x4, [x10, #8]!
1330
1331 sub w9, w9, #1
1332 cbz w9, KMP_LABEL(kmp_1)
1333 ldr x5, [x10, #8]!
1334
1335 sub w9, w9, #1
1336 cbz w9, KMP_LABEL(kmp_1)
1337 ldr x6, [x10, #8]!
1338
1339 sub w9, w9, #1
1340 cbz w9, KMP_LABEL(kmp_1)
1341 ldr x7, [x10, #8]!
1342
1343KMP_LABEL(kmp_0):
1344 sub w9, w9, #1
1345 cbz w9, KMP_LABEL(kmp_1)
1346 ldr x12, [x10, #8]!
1347 str x12, [x11], #8
1348 b KMP_LABEL(kmp_0)
1349KMP_LABEL(kmp_1):
1350 blr x8
1351 orr w0, wzr, #1
1352 mov sp, x29
1353# if OMPT_SUPPORT
1354 str xzr, [x19]
1355 ldp x19, x20, [sp], #16
1356# endif
1357 ldp x29, x30, [sp], #16
1358 ret
1359
1360 DEBUG_INFO __kmp_invoke_microtask
1361// -- End __kmp_invoke_microtask
1362
1363#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */
1364
1365#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM
1366
1367//------------------------------------------------------------------------
1368// int
1369// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1370// int gtid, int tid,
1371// int argc, void *p_argv[]
1372// #if OMPT_SUPPORT
1373// ,
1374// void **exit_frame_ptr
1375// #endif
1376// ) {
1377// #if OMPT_SUPPORT
1378// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1379// #endif
1380//
1381// (*pkfn)( & gtid, & tid, argv[0], ... );
1382//
1383// // FIXME: This is done at call-site and can be removed here.
1384// #if OMPT_SUPPORT
1385// *exit_frame_ptr = 0;
1386// #endif
1387//
1388// return 1;
1389// }
1390//
1391// parameters:
1392// r0: pkfn
1393// r1: gtid
1394// r2: tid
1395// r3: argc
1396// r4(stack): p_argv
1397// r5(stack): &exit_frame
1398//
1399// locals:
1400// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1401// __tid: tid parm pushed on stack so can pass &tid to pkfn
1402//
1403// reg temps:
1404// r4: used to hold pkfn address
1405// r5: used as temporary for number of pkfn parms
1406// r6: used to traverse p_argv array
1407// r7: frame pointer (in some configurations)
1408// r8: used as temporary for stack placement calculation
1409// and as pointer to base of callee saved area
1410// r9: used as temporary for stack parameters
1411// r10: used to preserve exit_frame_ptr, callee-save
1412// r11: frame pointer (in some configurations)
1413//
1414// return: r0 (always 1/TRUE)
1415//
1416
1417__gtid = 4
1418__tid = 8
1419
1420// -- Begin __kmp_invoke_microtask
1421// mark_begin;
1422 .text
1423 PROC __kmp_invoke_microtask
1424
1425 // Pushing one extra register (r3) to keep the stack aligned
1426 // for when we call pkfn below
1427 push {r3-r11,lr}
1428 // Load p_argv and &exit_frame
1429 ldr r4, [sp, #10*4]
1430# if OMPT_SUPPORT
1431 ldr r5, [sp, #11*4]
1432# endif
1433
1434# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS)
1435# define FP r7
1436# define FPOFF 4*4
1437#else
1438# define FP r11
1439# define FPOFF 8*4
1440#endif
1441 add FP, sp, #FPOFF
1442# if OMPT_SUPPORT
1443 mov r10, r5
1444 str FP, [r10]
1445# endif
1446 mov r8, sp
1447
1448 // Calculate how much stack to allocate, in increments of 8 bytes.
1449 // We strictly need 4*(argc-2) bytes (2 arguments are passed in
1450 // registers) but allocate 4*argc for simplicity (to avoid needing
1451 // to handle the argc<2 cases). We align the number of bytes
1452 // allocated to 8 bytes, to keep the stack aligned. (Since we
1453 // already allocate more than enough, it's ok to round down
1454 // instead of up for the alignment.) We allocate another extra
1455 // 8 bytes for gtid and tid.
1456 mov r5, #1
1457 add r5, r5, r3, lsr #1
1458 sub sp, sp, r5, lsl #3
1459
1460 str r1, [r8, #-__gtid]
1461 str r2, [r8, #-__tid]
1462 mov r5, r3
1463 mov r6, r4
1464 mov r4, r0
1465
1466 // Prepare the first 2 parameters to pkfn - pointers to gtid and tid
1467 // in our stack frame.
1468 sub r0, r8, #__gtid
1469 sub r1, r8, #__tid
1470
1471 mov r8, sp
1472
1473 // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2
1474 cmp r5, #0
1475 beq KMP_LABEL(kmp_1)
1476 ldr r2, [r6]
1477
1478 subs r5, r5, #1
1479 beq KMP_LABEL(kmp_1)
1480 ldr r3, [r6, #4]!
1481
1482 // Loop, loading the rest of p_argv and writing the elements on the
1483 // stack.
1484KMP_LABEL(kmp_0):
1485 subs r5, r5, #1
1486 beq KMP_LABEL(kmp_1)
1487 ldr r12, [r6, #4]!
1488 str r12, [r8], #4
1489 b KMP_LABEL(kmp_0)
1490KMP_LABEL(kmp_1):
1491 blx r4
1492 mov r0, #1
1493
1494 sub r4, FP, #FPOFF
1495 mov sp, r4
1496# undef FP
1497# undef FPOFF
1498
1499# if OMPT_SUPPORT
1500 mov r1, #0
1501 str r1, [r10]
1502# endif
1503 pop {r3-r11,pc}
1504
1505 DEBUG_INFO __kmp_invoke_microtask
1506// -- End __kmp_invoke_microtask
1507
1508#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */
1509
1510#if KMP_ARCH_PPC64
1511
1512//------------------------------------------------------------------------
1513// int
1514// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1515// int gtid, int tid,
1516// int argc, void *p_argv[]
1517// #if OMPT_SUPPORT
1518// ,
1519// void **exit_frame_ptr
1520// #endif
1521// ) {
1522// #if OMPT_SUPPORT
1523// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1524// #endif
1525//
1526// (*pkfn)( & gtid, & tid, argv[0], ... );
1527//
1528// // FIXME: This is done at call-site and can be removed here.
1529// #if OMPT_SUPPORT
1530// *exit_frame_ptr = 0;
1531// #endif
1532//
1533// return 1;
1534// }
1535//
1536// parameters:
1537// r3: pkfn
1538// r4: gtid
1539// r5: tid
1540// r6: argc
1541// r7: p_argv
1542// r8: &exit_frame
1543//
1544// return: r3 (always 1/TRUE)
1545//
1546 .text
1547# if KMP_ARCH_PPC64_ELFv2
1548 .abiversion 2
1549# endif
1550 .globl __kmp_invoke_microtask
1551
1552# if KMP_ARCH_PPC64_ELFv2
1553 .p2align 4
1554# else
1555 .p2align 2
1556# endif
1557
1558 .type __kmp_invoke_microtask,@function
1559
1560# if KMP_ARCH_PPC64_ELFv2
1561__kmp_invoke_microtask:
1562.Lfunc_begin0:
1563.Lfunc_gep0:
1564 addis 2, 12, .TOC.-.Lfunc_gep0@ha
1565 addi 2, 2, .TOC.-.Lfunc_gep0@l
1566.Lfunc_lep0:
1567 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
1568# else
1569 .section .opd,"aw",@progbits
1570__kmp_invoke_microtask:
1571 .p2align 3
1572 .quad .Lfunc_begin0
1573 .quad .TOC.@tocbase
1574 .quad 0
1575 .text
1576.Lfunc_begin0:
1577# endif
1578
1579// -- Begin __kmp_invoke_microtask
1580// mark_begin;
1581
1582// We need to allocate a stack frame large enough to hold all of the parameters
1583// on the stack for the microtask plus what this function needs. That's 48
1584// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
1585// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
1586// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
1587// to save r30 to hold a copy of r8.
1588
1589 .cfi_startproc
1590 mflr 0
1591 std 31, -8(1)
1592 std 0, 16(1)
1593
1594// This is unusual because normally we'd set r31 equal to r1 after the stack
1595// frame is established. In this case, however, we need to dynamically compute
1596// the stack frame size, and so we keep a direct copy of r1 to access our
1597// register save areas and restore the r1 value before returning.
1598 mr 31, 1
1599 .cfi_def_cfa_register r31
1600 .cfi_offset r31, -8
1601 .cfi_offset lr, 16
1602
1603// Compute the size necessary for the local stack frame.
1604# if KMP_ARCH_PPC64_ELFv2
1605 li 12, 72
1606# else
1607 li 12, 88
1608# endif
1609 sldi 0, 6, 3
1610 add 12, 0, 12
1611 neg 12, 12
1612
1613// We need to make sure that the stack frame stays aligned (to 16 bytes).
1614 li 0, -16
1615 and 12, 0, 12
1616
1617// Establish the local stack frame.
1618 stdux 1, 1, 12
1619
1620# if OMPT_SUPPORT
1621 .cfi_offset r30, -16
1622 std 30, -16(31)
1623 std 1, 0(8)
1624 mr 30, 8
1625# endif
1626
1627// Store gtid and tid to the stack because they're passed by reference to the microtask.
1628 stw 4, -20(31)
1629 stw 5, -24(31)
1630
1631 mr 12, 6
1632 mr 4, 7
1633
1634 cmpwi 0, 12, 1
1635 blt 0, .Lcall
1636
1637 ld 5, 0(4)
1638
1639 cmpwi 0, 12, 2
1640 blt 0, .Lcall
1641
1642 ld 6, 8(4)
1643
1644 cmpwi 0, 12, 3
1645 blt 0, .Lcall
1646
1647 ld 7, 16(4)
1648
1649 cmpwi 0, 12, 4
1650 blt 0, .Lcall
1651
1652 ld 8, 24(4)
1653
1654 cmpwi 0, 12, 5
1655 blt 0, .Lcall
1656
1657 ld 9, 32(4)
1658
1659 cmpwi 0, 12, 6
1660 blt 0, .Lcall
1661
1662 ld 10, 40(4)
1663
1664 cmpwi 0, 12, 7
1665 blt 0, .Lcall
1666
1667// There are more than 6 microtask parameters, so we need to store the
1668// remainder to the stack.
1669 addi 12, 12, -6
1670 mtctr 12
1671
1672// These are set to 8 bytes before the first desired store address (we're using
1673// pre-increment loads and stores in the loop below). The parameter save area
1674// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
1675// 32 + 8*8 == 96 bytes above r1 for ELFv2.
1676 addi 4, 4, 40
1677# if KMP_ARCH_PPC64_ELFv2
1678 addi 12, 1, 88
1679# else
1680 addi 12, 1, 104
1681# endif
1682
1683.Lnext:
1684 ldu 0, 8(4)
1685 stdu 0, 8(12)
1686 bdnz .Lnext
1687
1688.Lcall:
1689# if KMP_ARCH_PPC64_ELFv2
1690 std 2, 24(1)
1691 mr 12, 3
1692#else
1693 std 2, 40(1)
1694// For ELFv1, we need to load the actual function address from the function descriptor.
1695 ld 12, 0(3)
1696 ld 2, 8(3)
1697 ld 11, 16(3)
1698#endif
1699
1700 addi 3, 31, -20
1701 addi 4, 31, -24
1702
1703 mtctr 12
1704 bctrl
1705# if KMP_ARCH_PPC64_ELFv2
1706 ld 2, 24(1)
1707# else
1708 ld 2, 40(1)
1709# endif
1710
1711# if OMPT_SUPPORT
1712 li 3, 0
1713 std 3, 0(30)
1714# endif
1715
1716 li 3, 1
1717
1718# if OMPT_SUPPORT
1719 ld 30, -16(31)
1720# endif
1721
1722 mr 1, 31
1723 ld 0, 16(1)
1724 ld 31, -8(1)
1725 mtlr 0
1726 blr
1727
1728 .long 0
1729 .quad 0
1730.Lfunc_end0:
1731 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
1732 .cfi_endproc
1733
1734// -- End __kmp_invoke_microtask
1735
1736#endif /* KMP_ARCH_PPC64 */
1737
1738#if KMP_ARCH_RISCV64
1739
1740//------------------------------------------------------------------------
1741//
1742// typedef void (*microtask_t)(int *gtid, int *tid, ...);
1743//
1744// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1745// void *p_argv[]
1746// #if OMPT_SUPPORT
1747// ,
1748// void **exit_frame_ptr
1749// #endif
1750// ) {
1751// #if OMPT_SUPPORT
1752// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1753// #endif
1754//
1755// (*pkfn)(&gtid, &tid, argv[0], ...);
1756//
1757// return 1;
1758// }
1759//
1760// Parameters:
1761// a0: pkfn
1762// a1: gtid
1763// a2: tid
1764// a3: argc
1765// a4: p_argv
1766// a5: exit_frame_ptr
1767//
1768// Locals:
1769// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
1770// __tid: tid param pushed on stack so can pass &tid to pkfn
1771//
1772// Temp. registers:
1773//
1774// t0: used to calculate the dynamic stack size / used to hold pkfn address
1775// t1: used as temporary for stack placement calculation
1776// t2: used as temporary for stack arguments
1777// t3: used as temporary for number of remaining pkfn parms
1778// t4: used to traverse p_argv array
1779//
1780// return: a0 (always 1/TRUE)
1781//
1782
1783__gtid = -20
1784__tid = -24
1785
1786// -- Begin __kmp_invoke_microtask
1787// mark_begin;
1788 .text
1789 .globl __kmp_invoke_microtask
1790 .p2align 1
1791 .type __kmp_invoke_microtask,@function
1792__kmp_invoke_microtask:
1793 .cfi_startproc
1794
1795 // First, save ra and fp
1796 addi sp, sp, -16
1797 sd ra, 8(sp)
1798 sd fp, 0(sp)
1799 addi fp, sp, 16
1800 .cfi_def_cfa fp, 0
1801 .cfi_offset ra, -8
1802 .cfi_offset fp, -16
1803
1804 // Compute the dynamic stack size:
1805 //
1806 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
1807 // reference
1808 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
1809 // function by register. Given that we have 8 of such registers (a[0-7])
1810 // and two + 'argc' arguments (consider &gtid and &tid), we need to
1811 // reserve max(0, argc - 6)*8 extra bytes
1812 //
1813 // The total number of bytes is then max(0, argc - 6)*8 + 8
1814
1815 // Compute max(0, argc - 6) using the following bithack:
1816 // max(0, x) = x - (x & (x >> 31)), where x := argc - 6
1817 // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
1818 addi t0, a3, -6
1819 srai t1, t0, 31
1820 and t1, t0, t1
1821 sub t0, t0, t1
1822
1823 addi t0, t0, 1
1824
1825 slli t0, t0, 3
1826 sub sp, sp, t0
1827
1828 // Align the stack to 16 bytes
1829 andi sp, sp, -16
1830
1831 mv t0, a0
1832 mv t3, a3
1833 mv t4, a4
1834
1835#if OMPT_SUPPORT
1836 // Save frame pointer into exit_frame
1837 sd fp, 0(a5)
1838#endif
1839
1840 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
1841
1842 sw a1, __gtid(fp)
1843 sw a2, __tid(fp)
1844
1845 addi a0, fp, __gtid
1846 addi a1, fp, __tid
1847
1848 beqz t3, .L_kmp_3
1849 ld a2, 0(t4)
1850
1851 addi t3, t3, -1
1852 beqz t3, .L_kmp_3
1853 ld a3, 8(t4)
1854
1855 addi t3, t3, -1
1856 beqz t3, .L_kmp_3
1857 ld a4, 16(t4)
1858
1859 addi t3, t3, -1
1860 beqz t3, .L_kmp_3
1861 ld a5, 24(t4)
1862
1863 addi t3, t3, -1
1864 beqz t3, .L_kmp_3
1865 ld a6, 32(t4)
1866
1867 addi t3, t3, -1
1868 beqz t3, .L_kmp_3
1869 ld a7, 40(t4)
1870
1871 // Prepare any additional argument passed through the stack
1872 addi t4, t4, 48
1873 mv t1, sp
1874 j .L_kmp_2
1875.L_kmp_1:
1876 ld t2, 0(t4)
1877 sd t2, 0(t1)
1878 addi t4, t4, 8
1879 addi t1, t1, 8
1880.L_kmp_2:
1881 addi t3, t3, -1
1882 bnez t3, .L_kmp_1
1883
1884.L_kmp_3:
1885 // Call pkfn function
1886 jalr t0
1887
1888 // Restore stack and return
1889
1890 addi a0, zero, 1
1891
1892 addi sp, fp, -16
1893 ld fp, 0(sp)
1894 ld ra, 8(sp)
1895 addi sp, sp, 16
1896 ret
1897.Lfunc_end0:
1898 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
1899 .cfi_endproc
1900
1901// -- End __kmp_invoke_microtask
1902
1903#endif /* KMP_ARCH_RISCV64 */
1904
1905#if KMP_ARCH_LOONGARCH64
1906
1907//------------------------------------------------------------------------
1908//
1909// typedef void (*microtask_t)(int *gtid, int *tid, ...);
1910//
1911// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1912// void *p_argv[]
1913// #if OMPT_SUPPORT
1914// ,
1915// void **exit_frame_ptr
1916// #endif
1917// ) {
1918// #if OMPT_SUPPORT
1919// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1920// #endif
1921//
1922// (*pkfn)(&gtid, &tid, argv[0], ...);
1923//
1924// return 1;
1925// }
1926//
1927// Parameters:
1928// a0: pkfn
1929// a1: gtid
1930// a2: tid
1931// a3: argc
1932// a4: p_argv
1933// a5: exit_frame_ptr
1934//
1935// Locals:
1936// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
1937// __tid: tid param pushed on stack so can pass &tid to pkfn
1938//
1939// Temp registers:
1940//
1941// t0: used to calculate the dynamic stack size / used to hold pkfn address
1942// t1: used as temporary for stack placement calculation
1943// t2: used as temporary for stack arguments
1944// t3: used as temporary for number of remaining pkfn parms
1945// t4: used to traverse p_argv array
1946//
1947// return: a0 (always 1/TRUE)
1948//
1949
1950// -- Begin __kmp_invoke_microtask
1951// mark_begin;
1952 .text
1953 .globl __kmp_invoke_microtask
1954 .p2align 2
1955 .type __kmp_invoke_microtask,@function
1956__kmp_invoke_microtask:
1957 .cfi_startproc
1958
1959 // First, save ra and fp
1960 addi.d $sp, $sp, -16
1961 st.d $ra, $sp, 8
1962 st.d $fp, $sp, 0
1963 addi.d $fp, $sp, 16
1964 .cfi_def_cfa 22, 0
1965 .cfi_offset 1, -8
1966 .cfi_offset 22, -16
1967
1968 // Compute the dynamic stack size:
1969 //
1970 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
1971 // reference
1972 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
1973 // function by register. Given that we have 8 of such registers (a[0-7])
1974 // and two + 'argc' arguments (consider &gtid and &tid), we need to
1975 // reserve max(0, argc - 6)*8 extra bytes
1976 //
1977 // The total number of bytes is then max(0, argc - 6)*8 + 8
1978
1979 addi.d $t0, $a3, -6
1980 slt $t1, $t0, $zero
1981 masknez $t0, $t0, $t1
1982 addi.d $t0, $t0, 1
1983 slli.d $t0, $t0, 3
1984 sub.d $sp, $sp, $t0
1985
1986 // Align the stack to 16 bytes
1987 bstrins.d $sp, $zero, 3, 0
1988
1989 move $t0, $a0
1990 move $t3, $a3
1991 move $t4, $a4
1992
1993#if OMPT_SUPPORT
1994 // Save frame pointer into exit_frame
1995 st.d $fp, $a5, 0
1996#endif
1997
1998 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
1999
2000 st.w $a1, $fp, -20
2001 st.w $a2, $fp, -24
2002
2003 addi.d $a0, $fp, -20
2004 addi.d $a1, $fp, -24
2005
2006 beqz $t3, .L_kmp_3
2007 ld.d $a2, $t4, 0
2008
2009 addi.d $t3, $t3, -1
2010 beqz $t3, .L_kmp_3
2011 ld.d $a3, $t4, 8
2012
2013 addi.d $t3, $t3, -1
2014 beqz $t3, .L_kmp_3
2015 ld.d $a4, $t4, 16
2016
2017 addi.d $t3, $t3, -1
2018 beqz $t3, .L_kmp_3
2019 ld.d $a5, $t4, 24
2020
2021 addi.d $t3, $t3, -1
2022 beqz $t3, .L_kmp_3
2023 ld.d $a6, $t4, 32
2024
2025 addi.d $t3, $t3, -1
2026 beqz $t3, .L_kmp_3
2027 ld.d $a7, $t4, 40
2028
2029 // Prepare any additional argument passed through the stack
2030 addi.d $t4, $t4, 48
2031 move $t1, $sp
2032 b .L_kmp_2
2033.L_kmp_1:
2034 ld.d $t2, $t4, 0
2035 st.d $t2, $t1, 0
2036 addi.d $t4, $t4, 8
2037 addi.d $t1, $t1, 8
2038.L_kmp_2:
2039 addi.d $t3, $t3, -1
2040 bnez $t3, .L_kmp_1
2041
2042.L_kmp_3:
2043 // Call pkfn function
2044 jirl $ra, $t0, 0
2045
2046 // Restore stack and return
2047
2048 addi.d $a0, $zero, 1
2049
2050 addi.d $sp, $fp, -16
2051 ld.d $fp, $sp, 0
2052 ld.d $ra, $sp, 8
2053 addi.d $sp, $sp, 16
2054 jr $ra
2055.Lfunc_end0:
2056 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2057 .cfi_endproc
2058
2059// -- End __kmp_invoke_microtask
2060
2061#endif /* KMP_ARCH_LOONGARCH64 */
2062
2063#if KMP_ARCH_VE
2064
2065//------------------------------------------------------------------------
2066//
2067// typedef void (*microtask_t)(int *gtid, int *tid, ...);
2068//
2069// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2070// void *p_argv[]
2071// #if OMPT_SUPPORT
2072// ,
2073// void **exit_frame_ptr
2074// #endif
2075// ) {
2076// #if OMPT_SUPPORT
2077// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2078// #endif
2079//
2080// (*pkfn)(&gtid, &tid, argv[0], ...);
2081//
2082// return 1;
2083// }
2084//
2085// Parameters:
2086// s0: pkfn
2087// s1: gtid
2088// s2: tid
2089// s3: argc
2090// s4: p_argv
2091// s5: exit_frame_ptr
2092//
2093// Locals:
2094// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
2095// __tid: tid param pushed on stack so can pass &tid to pkfn
2096//
2097// Temp. registers:
2098//
2099// s34: used to calculate the dynamic stack size
2100// s35: used as temporary for stack placement calculation
2101// s36: used as temporary for stack arguments
2102// s37: used as temporary for number of remaining pkfn parms
2103// s38: used to traverse p_argv array
2104//
2105// return: s0 (always 1/TRUE)
2106//
2107
2108__gtid = -4
2109__tid = -8
2110
2111// -- Begin __kmp_invoke_microtask
2112// mark_begin;
2113 .text
2114 .globl __kmp_invoke_microtask
2115 // A function requires 8 bytes align.
2116 .p2align 3
2117 .type __kmp_invoke_microtask,@function
2118__kmp_invoke_microtask:
2119 .cfi_startproc
2120
2121 // First, save fp and lr. VE stores them at caller stack frame.
2122 st %fp, 0(, %sp)
2123 st %lr, 8(, %sp)
2124 or %fp, 0, %sp
2125 .cfi_def_cfa %fp, 0
2126 .cfi_offset %lr, 8
2127 .cfi_offset %fp, 0
2128
2129 // Compute the dynamic stack size:
2130 //
2131 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
2132 // by reference
2133 // - We need 8 bytes for whole arguments. We have two + 'argc'
2134 // arguments (condider &gtid and &tid). We need to reserve
2135 // (argc + 2) * 8 bytes.
2136 // - We need 176 bytes for RSA and others
2137 //
2138 // The total number of bytes is then (argc + 2) * 8 + 8 + 176.
2139 //
2140 // |------------------------------|
2141 // | return address of callee | 8(%fp)
2142 // |------------------------------|
2143 // | frame pointer of callee | 0(%fp)
2144 // |------------------------------| <------------------ %fp
2145 // | __tid / __gtid | -8(%fp) / -4(%fp)
2146 // |------------------------------|
2147 // | argc+2 for arguments | 176(%sp)
2148 // |------------------------------|
2149 // | RSA |
2150 // |------------------------------|
2151 // | return address |
2152 // |------------------------------|
2153 // | frame pointer |
2154 // |------------------------------| <------------------ %sp
2155
2156 adds.w.sx %s34, 2, %s3
2157 sll %s34, %s34, 3
2158 lea %s34, 184(, %s34)
2159 subs.l %sp, %sp, %s34
2160
2161 // Align the stack to 16 bytes.
2162 and %sp, -16, %sp
2163
2164 // Save pkfn.
2165 or %s12, 0, %s0
2166
2167 // Call host to allocate stack if it is necessary.
2168 brge.l %sp, %sl, .L_kmp_pass
2169 ld %s61, 24(, %tp)
2170 lea %s63, 0x13b
2171 shm.l %s63, 0(%s61)
2172 shm.l %sl, 8(%s61)
2173 shm.l %sp, 16(%s61)
2174 monc
2175
2176.L_kmp_pass:
2177 lea %s35, 176(, %sp)
2178 adds.w.sx %s37, 0, %s3
2179 or %s38, 0, %s4
2180
2181#if OMPT_SUPPORT
2182 // Save frame pointer into exit_frame.
2183 st %fp, 0(%s5)
2184#endif
2185
2186 // Prepare arguments for the pkfn function (first 8 using s0-s7
2187 // registers, but need to store stack also because of varargs).
2188
2189 stl %s1, __gtid(%fp)
2190 stl %s2, __tid(%fp)
2191
2192 adds.l %s0, __gtid, %fp
2193 st %s0, 0(, %s35)
2194 adds.l %s1, __tid, %fp
2195 st %s1, 8(, %s35)
2196
2197 breq.l 0, %s37, .L_kmp_call
2198 ld %s2, 0(, %s38)
2199 st %s2, 16(, %s35)
2200
2201 breq.l 1, %s37, .L_kmp_call
2202 ld %s3, 8(, %s38)
2203 st %s3, 24(, %s35)
2204
2205 breq.l 2, %s37, .L_kmp_call
2206 ld %s4, 16(, %s38)
2207 st %s4, 32(, %s35)
2208
2209 breq.l 3, %s37, .L_kmp_call
2210 ld %s5, 24(, %s38)
2211 st %s5, 40(, %s35)
2212
2213 breq.l 4, %s37, .L_kmp_call
2214 ld %s6, 32(, %s38)
2215 st %s6, 48(, %s35)
2216
2217 breq.l 5, %s37, .L_kmp_call
2218 ld %s7, 40(, %s38)
2219 st %s7, 56(, %s35)
2220
2221 breq.l 6, %s37, .L_kmp_call
2222
2223 // Prepare any additional argument passed through the stack.
2224 adds.l %s37, -6, %s37
2225 lea %s38, 48(, %s38)
2226 lea %s35, 64(, %s35)
2227.L_kmp_loop:
2228 ld %s36, 0(, %s38)
2229 st %s36, 0(, %s35)
2230 adds.l %s37, -1, %s37
2231 adds.l %s38, 8, %s38
2232 adds.l %s35, 8, %s35
2233 brne.l 0, %s37, .L_kmp_loop
2234
2235.L_kmp_call:
2236 // Call pkfn function.
2237 bsic %lr, (, %s12)
2238
2239 // Return value.
2240 lea %s0, 1
2241
2242 // Restore stack and return.
2243 or %sp, 0, %fp
2244 ld %lr, 8(, %sp)
2245 ld %fp, 0(, %sp)
2246 b.l.t (, %lr)
2247.Lfunc_end0:
2248 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2249 .cfi_endproc
2250
2251// -- End __kmp_invoke_microtask
2252
2253#endif /* KMP_ARCH_VE */
2254
2255#if KMP_ARCH_S390X
2256
2257//------------------------------------------------------------------------
2258//
2259// typedef void (*microtask_t)(int *gtid, int *tid, ...);
2260//
2261// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2262// void *p_argv[]
2263// #if OMPT_SUPPORT
2264// ,
2265// void **exit_frame_ptr
2266// #endif
2267// ) {
2268// #if OMPT_SUPPORT
2269// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2270// #endif
2271//
2272// (*pkfn)(&gtid, &tid, argv[0], ...);
2273//
2274// return 1;
2275// }
2276//
2277// Parameters:
2278// r2: pkfn
2279// r3: gtid
2280// r4: tid
2281// r5: argc
2282// r6: p_argv
2283// SP+160: exit_frame_ptr
2284//
2285// Locals:
2286// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
2287// __tid: tid param pushed on stack so can pass &tid to pkfn
2288//
2289// Temp. registers:
2290//
2291// r0: used to fetch argv slots
2292// r7: used as temporary for number of remaining pkfn parms
2293// r8: argv
2294// r9: pkfn
2295// r10: stack size
2296// r11: previous fp
2297// r12: stack parameter area
2298// r13: argv slot
2299//
2300// return: r2 (always 1/TRUE)
2301//
2302
2303// -- Begin __kmp_invoke_microtask
2304// mark_begin;
2305 .text
2306 .globl __kmp_invoke_microtask
2307 .p2align 1
2308 .type __kmp_invoke_microtask,@function
2309__kmp_invoke_microtask:
2310 .cfi_startproc
2311
2312 stmg %r6,%r14,48(%r15)
2313 .cfi_offset %r6, -112
2314 .cfi_offset %r7, -104
2315 .cfi_offset %r8, -96
2316 .cfi_offset %r9, -88
2317 .cfi_offset %r10, -80
2318 .cfi_offset %r11, -72
2319 .cfi_offset %r12, -64
2320 .cfi_offset %r13, -56
2321 .cfi_offset %r14, -48
2322 .cfi_offset %r15, -40
2323 lgr %r11,%r15
2324 .cfi_def_cfa %r11, 160
2325
2326 // Compute the dynamic stack size:
2327 //
2328 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
2329 // reference
2330 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
2331 // function by register. Given that we have 5 of such registers (r[2-6])
2332 // and two + 'argc' arguments (consider &gtid and &tid), we need to
2333 // reserve max(0, argc - 3)*8 extra bytes
2334 //
2335 // The total number of bytes is then max(0, argc - 3)*8 + 8
2336
2337 lgr %r10,%r5
2338 aghi %r10,-2
2339 jnm 0f
2340 lghi %r10,0
23410:
2342 sllg %r10,%r10,3
2343 lgr %r12,%r10
2344 aghi %r10,176
2345 sgr %r15,%r10
2346 agr %r12,%r15
2347 stg %r11,0(%r15)
2348
2349 lgr %r9,%r2 // pkfn
2350
2351#if OMPT_SUPPORT
2352 // Save frame pointer into exit_frame
2353 lg %r8,160(%r11)
2354 stg %r11,0(%r8)
2355#endif
2356
2357 // Prepare arguments for the pkfn function (first 5 using r2-r6 registers)
2358
2359 stg %r3,160(%r12)
2360 la %r2,164(%r12) // gid
2361 stg %r4,168(%r12)
2362 la %r3,172(%r12) // tid
2363 lgr %r8,%r6 // argv
2364
2365 // If argc > 0
2366 ltgr %r7,%r5
2367 jz 1f
2368
2369 lg %r4,0(%r8) // argv[0]
2370 aghi %r7,-1
2371 jz 1f
2372
2373 // If argc > 1
2374 lg %r5,8(%r8) // argv[1]
2375 aghi %r7,-1
2376 jz 1f
2377
2378 // If argc > 2
2379 lg %r6,16(%r8) // argv[2]
2380 aghi %r7,-1
2381 jz 1f
2382
2383 lghi %r13,0 // Index [n]
23842:
2385 lg %r0,24(%r13,%r8) // argv[2+n]
2386 stg %r0,160(%r13,%r15) // parm[2+n]
2387 aghi %r13,8 // Next
2388 aghi %r7,-1
2389 jnz 2b
2390
23911:
2392 basr %r14,%r9 // Call pkfn
2393
2394 // Restore stack and return
2395
2396 lgr %r15,%r11
2397 lmg %r6,%r14,48(%r15)
2398 lghi %r2,1
2399 br %r14
2400.Lfunc_end0:
2401 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2402 .cfi_endproc
2403
2404// -- End __kmp_invoke_microtask
2405
2406#endif /* KMP_ARCH_S390X */
2407
2408#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32
2409#ifndef KMP_PREFIX_UNDERSCORE
2410# define KMP_PREFIX_UNDERSCORE(x) x
2411#endif
2412 .data
2413 COMMON .gomp_critical_user_, 32, 3
2414 .data
2415 .align 4
2416 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
2417KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
2418 .4byte .gomp_critical_user_
2419#ifdef __ELF__
2420 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4
2421#endif
2422#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */
2423
2424#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
2425 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \
2426 KMP_ARCH_S390X
2427#ifndef KMP_PREFIX_UNDERSCORE
2428# define KMP_PREFIX_UNDERSCORE(x) x
2429#endif
2430 .data
2431 COMMON .gomp_critical_user_, 32, 3
2432 .data
2433 .align 8
2434 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
2435KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
2436 .8byte .gomp_critical_user_
2437#ifdef __ELF__
2438 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
2439#endif
2440#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
2441 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE ||
2442 KMP_ARCH_S390X */
2443
2444#if KMP_OS_LINUX
2445# if KMP_ARCH_ARM || KMP_ARCH_AARCH64
2446.section .note.GNU-stack,"",%progbits
2447# elif !KMP_ARCH_WASM
2448.section .note.GNU-stack,"",@progbits
2449# endif
2450#endif
2451
2452#if KMP_ARCH_WASM
2453.data
2454.global .gomp_critical_user_
2455.global .gomp_critical_user_.var
2456.global .gomp_critical_user_.reduction.var
2457.global __kmp_unnamed_critical_addr
2458.gomp_critical_user_:
2459.zero 4
2460.size .gomp_critical_user_, 4
2461.gomp_critical_user_.var:
2462.zero 4
2463.size .gomp_critical_user_.var, 4
2464.gomp_critical_user_.reduction.var:
2465.zero 4
2466.size .gomp_critical_user_.reduction.var, 4
2467__kmp_unnamed_critical_addr:
2468 .4byte .gomp_critical_user_
2469 .size __kmp_unnamed_critical_addr, 4
2470#endif
2471

source code of openmp/runtime/src/z_Linux_asm.S