1 | // z_AIX_asm.S: - microtasking routines specifically |
2 | // written for Power platforms running AIX OS |
3 | |
4 | // |
5 | ////===----------------------------------------------------------------------===// |
6 | //// |
7 | //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | //// See https://llvm.org/LICENSE.txt for license information. |
9 | //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | //// |
11 | ////===----------------------------------------------------------------------===// |
12 | // |
13 | |
14 | // ----------------------------------------------------------------------- |
15 | // macros |
16 | // ----------------------------------------------------------------------- |
17 | |
18 | #include "kmp_config.h" |
19 | |
20 | #if KMP_OS_AIX |
21 | //------------------------------------------------------------------------ |
22 | // int |
23 | // __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...), |
24 | // int gtid, int tid, |
25 | // int argc, void *p_argv[] |
26 | // #if OMPT_SUPPORT |
27 | // , |
28 | // void **exit_frame_ptr |
29 | // #endif |
30 | // ) { |
31 | // #if OMPT_SUPPORT |
32 | // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
33 | // #endif |
34 | // |
35 | // (*pkfn)( & gtid, & tid, p_argv[0], ... ); |
36 | // |
37 | // // FIXME: This is done at call-site and can be removed here. |
38 | // #if OMPT_SUPPORT |
39 | // *exit_frame_ptr = 0; |
40 | // #endif |
41 | // |
42 | // return 1; |
43 | // } |
44 | // |
45 | // parameters: |
46 | // r3: pkfn |
47 | // r4: gtid |
48 | // r5: tid |
49 | // r6: argc |
50 | // r7: p_argv |
51 | // r8: &exit_frame |
52 | // |
53 | // return: r3 (always 1/TRUE) |
54 | // |
55 | |
56 | #if KMP_ARCH_PPC64_XCOFF |
57 | |
58 | .globl __kmp_invoke_microtask[DS] |
59 | .globl .__kmp_invoke_microtask |
60 | .align 4 |
61 | .csect __kmp_invoke_microtask[DS],3 |
62 | .vbyte 8, .__kmp_invoke_microtask |
63 | .vbyte 8, TOC[TC0] |
64 | .vbyte 8, 0 |
65 | .csect .text[PR],2 |
66 | .machine "pwr7" |
67 | .__kmp_invoke_microtask: |
68 | |
69 | |
70 | // -- Begin __kmp_invoke_microtask |
71 | // mark_begin; |
72 | |
73 | // We need to allocate a stack frame large enough to hold all of the parameters |
74 | // on the stack for the microtask plus what this function needs. That's 48 |
75 | // bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for |
76 | // the parameters to the microtask (gtid, tid, argc elements of p_argv), |
77 | // plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31. |
78 | // With OMP-T support, we need an additional 8 bytes to save r30 to hold |
79 | // a copy of r8. |
80 | // Stack offsets relative to stack pointer: |
81 | // r31: -8, r30: -16, gtid: -20, tid: -24 |
82 | |
83 | mflr 0 |
84 | std 31, -8(1) # Save r31 to the stack |
85 | std 0, 16(1) # Save LR to the linkage area |
86 | |
87 | // This is unusual because normally we'd set r31 equal to r1 after the stack |
88 | // frame is established. In this case, however, we need to dynamically compute |
89 | // the stack frame size, and so we keep a direct copy of r1 to access our |
90 | // register save areas and restore the r1 value before returning. |
91 | mr 31, 1 |
92 | |
93 | // Compute the size of the "argc" portion of the parameter save area. |
94 | // The parameter save area is always at least 64 bytes long (i.e. 8 regs) |
95 | // The microtask has (2 + argc) parameters, so if argc <= 6, we need to |
96 | // to allocate 8*6 bytes, not 8*argc. |
97 | li 0, 6 |
98 | cmpwi 0, 6, 6 |
99 | iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 |
100 | sldi 0, 0, 3 # r0 = 8 * max(argc, 6) |
101 | |
102 | // Compute the size necessary for the local stack frame. |
103 | // 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) + |
104 | // 8 (parameter gtid) + 8 (parameter tid) |
105 | li 12, 88 |
106 | add 12, 0, 12 |
107 | neg 12, 12 |
108 | |
109 | // We need to make sure that the stack frame stays aligned (to 16 bytes). |
110 | li 0, -16 |
111 | and 12, 0, 12 |
112 | |
113 | // Establish the local stack frame. |
114 | stdux 1, 1, 12 |
115 | |
116 | #if OMPT_SUPPORT |
117 | std 30, -16(31) # Save r30 to the stack |
118 | std 1, 0(8) |
119 | mr 30, 8 |
120 | #endif |
121 | |
122 | // Store gtid and tid to the stack because they're passed by reference to the microtask. |
123 | stw 4, -20(31) # Save gtid to the stack |
124 | stw 5, -24(31) # Save tid to the stack |
125 | |
126 | mr 12, 6 # r12 = argc |
127 | mr 4, 7 # r4 = p_argv |
128 | |
129 | cmpwi 0, 12, 1 |
130 | blt 0, .Lcall # if (argc < 1) goto .Lcall |
131 | |
132 | ld 5, 0(4) # r5 = p_argv[0] |
133 | |
134 | cmpwi 0, 12, 2 |
135 | blt 0, .Lcall # if (argc < 2) goto .Lcall |
136 | |
137 | ld 6, 8(4) # r6 = p_argv[1] |
138 | |
139 | cmpwi 0, 12, 3 |
140 | blt 0, .Lcall # if (argc < 3) goto .Lcall |
141 | |
142 | ld 7, 16(4) # r7 = p_argv[2] |
143 | |
144 | cmpwi 0, 12, 4 |
145 | blt 0, .Lcall # if (argc < 4) goto .Lcall |
146 | |
147 | ld 8, 24(4) # r8 = p_argv[3] |
148 | |
149 | cmpwi 0, 12, 5 |
150 | blt 0, .Lcall # if (argc < 5) goto .Lcall |
151 | |
152 | ld 9, 32(4) # r9 = p_argv[4] |
153 | |
154 | cmpwi 0, 12, 6 |
155 | blt 0, .Lcall # if (argc < 6) goto .Lcall |
156 | |
157 | ld 10, 40(4) # r10 = p_argv[5] |
158 | |
159 | cmpwi 0, 12, 7 |
160 | blt 0, .Lcall # if (argc < 7) goto .Lcall |
161 | |
162 | // There are more than 6 microtask parameters, so we need to store the |
163 | // remainder to the stack. |
164 | addi 12, 12, -6 # argc -= 6 |
165 | mtctr 12 |
166 | |
167 | // These are set to 8 bytes before the first desired store address (we're using |
168 | // pre-increment loads and stores in the loop below). The parameter save area |
169 | // for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64. |
170 | addi 4, 4, 40 # p_argv = p_argv + 5 |
171 | # (i.e. skip the 5 elements we already processed) |
172 | addi 12, 1, 104 # r12 = stack offset (112 - 8) |
173 | |
174 | .Lnext: |
175 | ldu 0, 8(4) |
176 | stdu 0, 8(12) |
177 | bdnz .Lnext |
178 | |
179 | .Lcall: |
180 | std 2, 40(1) # Save the TOC pointer to the linkage area |
181 | // Load the actual function address from the function descriptor. |
182 | ld 12, 0(3) # Function address |
183 | ld 2, 8(3) # TOC pointer |
184 | ld 11, 16(3) # Environment pointer |
185 | |
186 | addi 3, 31, -20 # r3 = >id |
187 | addi 4, 31, -24 # r4 = &tid |
188 | |
189 | mtctr 12 # CTR = function address |
190 | bctrl # Branch to CTR |
191 | ld 2, 40(1) # Restore TOC pointer from linkage area |
192 | |
193 | #if OMPT_SUPPORT |
194 | li 3, 0 |
195 | std 3, 0(30) |
196 | #endif |
197 | |
198 | li 3, 1 |
199 | |
200 | #if OMPT_SUPPORT |
201 | ld 30, -16(31) # Restore r30 from the saved value on the stack |
202 | #endif |
203 | |
204 | mr 1, 31 |
205 | ld 31, -8(1) # Restore r31 from the saved value on the stack |
206 | ld 0, 16(1) |
207 | mtlr 0 # Restore LR from the linkage area |
208 | blr # Branch to LR |
209 | |
210 | #else // KMP_ARCH_PPC_XCOFF |
211 | |
212 | .globl __kmp_invoke_microtask[DS] |
213 | .globl .__kmp_invoke_microtask |
214 | .align 4 |
215 | .csect __kmp_invoke_microtask[DS],2 |
216 | .vbyte 4, .__kmp_invoke_microtask |
217 | .vbyte 4, TOC[TC0] |
218 | .vbyte 4, 0 |
219 | .csect .text[PR],2 |
220 | .machine "pwr7" |
221 | .__kmp_invoke_microtask: |
222 | |
223 | |
224 | // -- Begin __kmp_invoke_microtask |
225 | // mark_begin; |
226 | |
227 | // We need to allocate a stack frame large enough to hold all of the parameters |
228 | // on the stack for the microtask plus what this function needs. That's 24 |
229 | // bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for |
230 | // the parameters to the microtask (gtid, tid, argc elements of p_argv), |
231 | // plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31. |
232 | // With OMP-T support, we need an additional 4 bytes to save r30 to hold |
233 | // a copy of r8. |
234 | // Stack offsets relative to stack pointer: |
235 | // r31: -4, r30: -8, gtid: -12, tid: -16 |
236 | |
237 | mflr 0 |
238 | stw 31, -4(1) # Save r31 to the stack |
239 | stw 0, 8(1) # Save LR to the linkage area |
240 | |
241 | // This is unusual because normally we'd set r31 equal to r1 after the stack |
242 | // frame is established. In this case, however, we need to dynamically compute |
243 | // the stack frame size, and so we keep a direct copy of r1 to access our |
244 | // register save areas and restore the r1 value before returning. |
245 | mr 31, 1 |
246 | |
247 | // Compute the size of the "argc" portion of the parameter save area. |
248 | // The parameter save area is always at least 32 bytes long (i.e. 8 regs) |
249 | // The microtask has (2 + argc) parameters, so if argc <= 6, we need to |
250 | // to allocate 4*6 bytes, not 4*argc. |
251 | li 0, 6 |
252 | cmpwi 0, 6, 6 |
253 | iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 |
254 | slwi 0, 0, 2 # r0 = 4 * max(argc, 6) |
255 | |
256 | // Compute the size necessary for the local stack frame. |
257 | // 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) + |
258 | // 4 (parameter gtid) + 4 (parameter tid) |
259 | li 12, 56 |
260 | add 12, 0, 12 |
261 | neg 12, 12 |
262 | |
263 | // We need to make sure that the stack frame stays aligned (to 16 bytes). |
264 | li 0, -16 |
265 | and 12, 0, 12 |
266 | |
267 | // Establish the local stack frame. |
268 | stwux 1, 1, 12 |
269 | |
270 | #if OMPT_SUPPORT |
271 | stw 30, -8(31) # Save r30 to the stack |
272 | stw 1, 0(8) |
273 | mr 30, 8 |
274 | #endif |
275 | |
276 | // Store gtid and tid to the stack because they're passed by reference to the microtask. |
277 | stw 4, -12(31) # Save gtid to the stack |
278 | stw 5, -16(31) # Save tid to the stack |
279 | |
280 | mr 12, 6 # r12 = argc |
281 | mr 4, 7 # r4 = p_argv |
282 | |
283 | cmpwi 0, 12, 1 |
284 | blt 0, .Lcall # if (argc < 1) goto .Lcall |
285 | |
286 | lwz 5, 0(4) # r5 = p_argv[0] |
287 | |
288 | cmpwi 0, 12, 2 |
289 | blt 0, .Lcall # if (argc < 2) goto .Lcall |
290 | |
291 | lwz 6, 4(4) # r6 = p_argv[1] |
292 | |
293 | cmpwi 0, 12, 3 |
294 | blt 0, .Lcall # if (argc < 3) goto .Lcall |
295 | |
296 | lwz 7, 8(4) # r7 = p_argv[2] |
297 | |
298 | cmpwi 0, 12, 4 |
299 | blt 0, .Lcall # if (argc < 4) goto .Lcall |
300 | |
301 | lwz 8, 12(4) # r8 = p_argv[3] |
302 | |
303 | cmpwi 0, 12, 5 |
304 | blt 0, .Lcall # if (argc < 5) goto .Lcall |
305 | |
306 | lwz 9, 16(4) # r9 = p_argv[4] |
307 | |
308 | cmpwi 0, 12, 6 |
309 | blt 0, .Lcall # if (argc < 6) goto .Lcall |
310 | |
311 | lwz 10, 20(4) # r10 = p_argv[5] |
312 | |
313 | cmpwi 0, 12, 7 |
314 | blt 0, .Lcall # if (argc < 7) goto .Lcall |
315 | |
316 | // There are more than 6 microtask parameters, so we need to store the |
317 | // remainder to the stack. |
318 | addi 12, 12, -6 # argc -= 6 |
319 | mtctr 12 |
320 | |
321 | // These are set to 4 bytes before the first desired store address (we're using |
322 | // pre-increment loads and stores in the loop below). The parameter save area |
323 | // for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF. |
324 | addi 4, 4, 20 # p_argv = p_argv + 5 |
325 | # (i.e. skip the 5 elements we already processed) |
326 | addi 12, 1, 52 # r12 = stack offset (56 - 4) |
327 | |
328 | .Lnext: |
329 | lwzu 0, 4(4) |
330 | stwu 0, 4(12) |
331 | bdnz .Lnext |
332 | |
333 | .Lcall: |
334 | stw 2, 20(1) # Save the TOC pointer to the linkage area |
335 | // Load the actual function address from the function descriptor. |
336 | lwz 12, 0(3) # Function address |
337 | lwz 2, 4(3) # TOC pointer |
338 | lwz 11, 8(3) # Environment pointer |
339 | |
340 | addi 3, 31, -12 # r3 = >id |
341 | addi 4, 31, -16 # r4 = &tid |
342 | |
343 | mtctr 12 # CTR = function address |
344 | bctrl # Branch to CTR |
345 | lwz 2, 20(1) # Restore TOC pointer from linkage area |
346 | |
347 | #if OMPT_SUPPORT |
348 | li 3, 0 |
349 | stw 3, 0(30) |
350 | #endif |
351 | |
352 | li 3, 1 |
353 | |
354 | #if OMPT_SUPPORT |
355 | lwz 30, -8(31) # Restore r30 from the saved value on the stack |
356 | #endif |
357 | |
358 | mr 1, 31 |
359 | lwz 31, -4(1) # Restore r31 from the saved value on the stack |
360 | lwz 0, 8(1) |
361 | mtlr 0 # Restore LR from the linkage area |
362 | blr # Branch to LR |
363 | |
364 | #endif // KMP_ARCH_PPC64_XCOFF |
365 | |
366 | .Lfunc_end0: |
367 | .vbyte 4, 0x00000000 # Traceback table begin |
368 | .byte 0x00 # Version = 0 |
369 | .byte 0x09 # Language = CPlusPlus |
370 | .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue |
371 | # +HasTraceBackTableOffset, -IsInternalProcedure |
372 | # -HasControlledStorage, -IsTOCless |
373 | # -IsFloatingPointPresent |
374 | # -IsFloatingPointOperationLogOrAbortEnabled |
375 | .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed |
376 | # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved |
377 | .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 |
378 | #if OMPT_SUPPORT |
379 | .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2 |
380 | .byte 0x06 # NumberOfFixedParms = 6 |
381 | #else |
382 | .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 |
383 | .byte 0x05 # NumberOfFixedParms = 5 |
384 | #endif |
385 | .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack |
386 | .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i |
387 | .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size |
388 | .vbyte 2, 0x0016 # Function name len = 22 |
389 | .byte "__kmp_invoke_microtask" # Function Name |
390 | .byte 0x1f # AllocaRegister = 31 |
391 | # -- End function |
392 | |
393 | // -- End __kmp_invoke_microtask |
394 | |
395 | // Support for unnamed common blocks. |
396 | |
397 | .comm .gomp_critical_user_, 32, 3 |
398 | #if KMP_ARCH_PPC64_XCOFF |
399 | .csect __kmp_unnamed_critical_addr[RW],3 |
400 | #else |
401 | .csect __kmp_unnamed_critical_addr[RW],2 |
402 | #endif |
403 | .globl __kmp_unnamed_critical_addr[RW] |
404 | .ptr .gomp_critical_user_ |
405 | |
406 | // -- End unnamed common block |
407 | |
408 | .toc |
409 | |
410 | #endif // KMP_OS_AIX |
411 | |